From ae2b69fbe9f98d0008b0b2ee00606790ed8df8b1 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 14 Jan 2026 18:38:59 +0100 Subject: [PATCH 1/3] Limit the number of active curl handles Previously, calling queryValidPaths() with a large number (e.g. 100K) of store paths failed because Nix immediately creates a `TransferItem` for each .narinfo, which is then registered as a handle with curl. However curl appears to scale poorly internally: even though only a few downloads are actually started (up to the connections/streams limits), it spends a lot of CPU time dealing with the inactive handles. So the curl thread is sitting at 100% CPU, the active downloads stall and time out, and everything grind to a halt. So now we limit the number of curl handles to http-connections * 5. With this, fetching 100K .narinfo files from localhost succeeds in ~15 seconds. --- src/libstore/filetransfer.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 7be3389e073..9749eb88540 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -703,6 +703,8 @@ struct curlFileTransfer : public FileTransfer std::thread workerThread; + const size_t maxQueueSize = fileTransferSettings.httpConnections.get() * 5; + curlFileTransfer() : mt19937(rd()) { @@ -832,6 +834,13 @@ struct curlFileTransfer : public FileTransfer { auto state(state_.lock()); while (!state->incoming.empty()) { + /* Limit the number of active curl handles, since curl doesn't scale well. */ + if (items.size() + incoming.size() >= maxQueueSize) { + auto t = now + std::chrono::milliseconds(100); + if (nextWakeup == std::chrono::steady_clock::time_point() || t < nextWakeup) + nextWakeup = t; + break; + } auto item = state->incoming.top(); if (item->embargo <= now) { incoming.push_back(item); From c08722cba51c1f471370b57d9179d5ad0e9e8c6d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 14 Jan 2026 19:12:09 +0100 Subject: [PATCH 2/3] curlFileTransfer: Lazily create activity and set startTime There can be a long time between the creation of `TransferItem` and the start of the curl download, which can lead to misleading download durations and progress bar status. So now we create the `Activity` and update `startTime` when curl actually starts the download. --- src/libstore/filetransfer.cc | 44 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 9749eb88540..274babcfa9b 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -53,7 +53,7 @@ struct curlFileTransfer : public FileTransfer curlFileTransfer & fileTransfer; FileTransferRequest request; FileTransferResult result; - Activity act; + std::unique_ptr _act; bool done = false; // whether either the success or failure function has been called Callback callback; CURL * req = 0; @@ -98,12 +98,6 @@ struct curlFileTransfer : public FileTransfer Callback && callback) : fileTransfer(fileTransfer) , request(request) - , act(*logger, - lvlTalkative, - actFileTransfer, - fmt("%s '%s'", request.verb(/*continuous=*/true), request.uri), - {request.uri.to_string()}, - request.parentAct) , callback(std::move(callback)) , finalSink([this](std::string_view data) { if (errorSink) { @@ -301,9 +295,29 @@ struct curlFileTransfer : public FileTransfer return ((TransferItem *) userp)->headerCallback(contents, size, nmemb); } + /** + * Lazily start an `Activity`. We don't do this in the `TransferItem` constructor to avoid showing downloads + * that are only enqueued but not actually started. + */ + Activity & act() + { + if (!_act) { + _act = std::make_unique( + *logger, + lvlTalkative, + actFileTransfer, + fmt("%s '%s'", request.verb(/*continuous=*/true), request.uri), + Logger::Fields{request.uri.to_string()}, + request.parentAct); + // Reset the start time to when we actually started the download. + startTime = std::chrono::steady_clock::now(); + } + return *_act; + } + int progressCallback(curl_off_t dltotal, curl_off_t dlnow) noexcept try { - act.progress(dlnow, dltotal); + act().progress(dlnow, dltotal); return getInterrupted(); } catch (nix::Interrupted &) { assert(getInterrupted()); @@ -380,6 +394,13 @@ struct curlFileTransfer : public FileTransfer return ((TransferItem *) clientp)->seekCallback(offset, origin); } + static int resolverCallbackWrapper(void *, void *, void * clientp) noexcept + { + // Create the `Activity` associated with this download. + ((TransferItem *) clientp)->act(); + return 0; + } + void unpause() { /* Unpausing an already unpaused transfer is a no-op. */ @@ -497,6 +518,11 @@ struct curlFileTransfer : public FileTransfer } #endif + // This seems to be the earliest libcurl callback that signals that the download is happening, so we can + // call act(). + curl_easy_setopt(req, CURLOPT_RESOLVER_START_FUNCTION, resolverCallbackWrapper); + curl_easy_setopt(req, CURLOPT_RESOLVER_START_DATA, this); + result.data.clear(); result.bodySize = 0; } @@ -545,7 +571,7 @@ struct curlFileTransfer : public FileTransfer if (httpStatus == 304 && result.etag == "") result.etag = request.expectedETag; - act.progress(result.bodySize, result.bodySize); + act().progress(result.bodySize, result.bodySize); done = true; callback(std::move(result)); } From fa2250a87455d566c04a2b3419c030804d4736a3 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 14 Jan 2026 19:40:53 +0100 Subject: [PATCH 3/3] resolverCallbackWrapper(): Catch exceptions --- src/libstore/filetransfer.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 274babcfa9b..039f766104d 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -395,10 +395,12 @@ struct curlFileTransfer : public FileTransfer } static int resolverCallbackWrapper(void *, void *, void * clientp) noexcept - { + try { // Create the `Activity` associated with this download. ((TransferItem *) clientp)->act(); return 0; + } catch (...) { + return 1; } void unpause()