diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 2773102575eab..718d9ccd98a0a 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -402,6 +402,13 @@ bug_fixes: - area: http2 change: | Apply nghttp2 CVE-2026-27135 patch. +- area: shared_pool + change: | + Fixed a leak in ``ObjectSharedPool`` when the last reference to a pooled object is released + on a non-main thread after the main dispatcher has started teardown. The queued cross-thread + deletion now carries ownership, so the object is freed whether or not the dispatcher runs + the queued callback. Most visibly this fixes a LeakSanitizer leak in + ``StrictDnsClusterImpl`` involving the ``Locality`` shared pool during server shutdown. removed_config_or_runtime: # *Normally occurs at the end of the* :ref:`deprecation period ` diff --git a/source/common/shared_pool/shared_pool.h b/source/common/shared_pool/shared_pool.h index 898dc82dc0728..024237c2ad321 100644 --- a/source/common/shared_pool/shared_pool.h +++ b/source/common/shared_pool/shared_pool.h @@ -89,7 +89,15 @@ class ObjectSharedPool auto this_shared_ptr = this->shared_from_this(); // Used for testing to simulate some race condition scenarios sync_.syncPoint(DeleteObjectOnMainThread); - dispatcher_.post([ptr, this_shared_ptr] { this_shared_ptr->deleteObjectOnMainThread(ptr); }); + // Transfer ownership of `ptr` into a std::unique_ptr captured by the posted lambda. + // If the main dispatcher is torn down before the post runs (e.g. a worker thread + // releases the last shared_ptr during server shutdown), the dispatcher destroys the + // queued callback; the captured unique_ptr's destructor then frees the object, avoiding + // a leak. If the lambda does run, release() hands the raw pointer to the main-thread + // cleanup path, which performs the object_pool_ bookkeeping and deletes the object. + dispatcher_.post([owned_ptr = std::unique_ptr(ptr), this_shared_ptr]() mutable { + this_shared_ptr->deleteObjectOnMainThread(owned_ptr.release()); + }); } } diff --git a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc index 9cbcdb8ecde12..4188e5abba034 100644 --- a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc +++ b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc @@ -293,7 +293,7 @@ void DnsCacheImpl::onReResolveAlarm(const std::string& host) { auto last_used_time = primary_host.host_info_->lastUsedTime(); ENVOY_LOG(debug, "host='{}' TTL check: now={} last_used={} TTL {}", host, now_duration.count(), last_used_time.count(), host_ttl_.count()); - if ((now_duration - last_used_time) > host_ttl_) { + if ((now_duration - last_used_time) >= host_ttl_) { ENVOY_LOG(debug, "host='{}' TTL expired, removing", host); removeHost(host, primary_host, true); } else { @@ -568,19 +568,26 @@ void DnsCacheImpl::finishResolve(const std::string& host, // which can exceed host_ttl by a large margin. const auto now = main_thread_dispatcher_.timeSource().monotonicTime().time_since_epoch(); const auto elapsed = now - primary_host_info->host_info_->lastUsedTime(); - std::chrono::milliseconds refresh_interval( + const std::chrono::milliseconds raw_backoff_ms( primary_host_info->failure_backoff_strategy_->nextBackOffMs()); + std::chrono::milliseconds refresh_interval(raw_backoff_ms); if (elapsed >= host_ttl_) { - refresh_interval = std::chrono::milliseconds(1); + refresh_interval = std::chrono::milliseconds(0); } else { const auto until_eviction = - std::chrono::duration_cast(host_ttl_ - elapsed) + - std::chrono::milliseconds(1); + std::chrono::duration_cast(host_ttl_ - elapsed); refresh_interval = std::min(refresh_interval, until_eviction); } + // Floor the result at min_refresh_interval_ (dns_min_refresh_rate) to prevent arming + // a ms-scale alarm that can kick rapid-fire resolves and race with dispatcher/resolver + // teardown in integration tests (observed as a LeakSanitizer leak in + // proxy_filter_integration_test DoubleResolution). + refresh_interval = + std::max(refresh_interval, + std::chrono::duration_cast(min_refresh_interval_)); primary_host_info->refresh_timer_->enableTimer(refresh_interval); - ENVOY_LOG(debug, "DNS refresh rate reset for host '{}', (failure) refresh rate {} ms", host, - refresh_interval.count()); + ENVOY_LOG(debug, "DNS refresh rate reset for host '{}', (failure) raw={} ms armed={} ms", + host, raw_backoff_ms.count(), refresh_interval.count()); } } } diff --git a/test/common/shared_pool/shared_pool_test.cc b/test/common/shared_pool/shared_pool_test.cc index e11d33233a130..7d90ed31c8745 100644 --- a/test/common/shared_pool/shared_pool_test.cc +++ b/test/common/shared_pool/shared_pool_test.cc @@ -1,3 +1,4 @@ +#include #include #include "source/common/event/timer_impl.h" @@ -13,6 +14,21 @@ namespace Envoy { namespace SharedPool { +namespace { +struct Counted { + explicit Counted(int v) : v_(v) { ++live; } + Counted(const Counted& o) : v_(o.v_) { ++live; } + Counted(Counted&& o) noexcept : v_(o.v_) { ++live; } + ~Counted() { --live; } + bool operator==(const Counted& o) const { return v_ == o.v_; } + int v_; + inline static std::atomic live{0}; +}; +struct CountedHash { + size_t operator()(const Counted& c) const { return static_cast(c.v_); } +}; +} // namespace + class SharedPoolTest : public testing::Test { protected: SharedPoolTest() @@ -222,5 +238,42 @@ TEST_F(SharedPoolTest, HashCollision) { EXPECT_EQ(0, pool->poolSize()); } +TEST_F(SharedPoolTest, DispatcherTeardownDropsCrossThreadDeleteFreesObject) { + ASSERT_EQ(0, Counted::live.load()); + + testing::NiceMock dispatcher; + std::vector posted_cbs; + EXPECT_CALL(dispatcher, post(testing::_)).WillRepeatedly(testing::Invoke([&](Event::PostCb cb) { + posted_cbs.push_back(std::move(cb)); + })); + + auto pool = std::make_shared>(dispatcher); + auto obj = pool->getObject(Counted{7}); + ASSERT_EQ(1, Counted::live.load()); + ASSERT_TRUE(posted_cbs.empty()); + + // Drop the last shared_ptr from a different thread, triggering the cross-thread delete path. + Thread::ThreadFactory& thread_factory = Thread::threadFactoryForTest(); + auto thread = thread_factory.createThread([&]() { obj.reset(); }); + thread->join(); + + // One callback should have been posted (the deferred delete). + ASSERT_EQ(1u, posted_cbs.size()); + // The object is still alive — held by the captured unique_ptr inside the posted lambda. + EXPECT_EQ(1, Counted::live.load()); + + // Simulate dispatcher teardown: drop the queued callback without running it. + // The captured unique_ptr destructor must free the object. + posted_cbs.clear(); + + // Regression assertion: prior to the fix, the raw T* was captured by value and the + // callback destructor could not free it, causing a leak (live would still be 1). + EXPECT_EQ(0, Counted::live.load()); + + // Release the pool; the object is already destroyed, so this is a no-op for the counter. + pool.reset(); + EXPECT_EQ(0, Counted::live.load()); +} + } // namespace SharedPool } // namespace Envoy diff --git a/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc b/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc index 882388daa5edc..4bf75efde53d7 100644 --- a/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc +++ b/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc @@ -910,17 +910,153 @@ TEST_F(DnsCacheImplTest, ResolveFailureBackoffCappedByHostTtl) { resolve_timer->invokeCallback(); // DNS fails. The raw failure backoff is 60s (dns_refresh_rate). Without the cap the next alarm - // would fire at T+61s. With the cap, it fires at host_ttl-elapsed+1ms = 1000-100+1 = 901ms. + // would fire at T+61s. The cap reduces it to host_ttl-elapsed = 1000-100 = 900ms, which is + // then floored at dns_min_refresh_rate = 1000ms because the cap kicked in. EXPECT_CALL(*timeout_timer, disableTimer()); EXPECT_CALL(update_callbacks_, onDnsResolutionComplete("foo.com:80", DnsHostInfoEquals("10.0.0.1:80", "foo.com", false), Network::DnsResolver::ResolutionStatus::Failure)); - EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(901), _)); + EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(1000), _)); + resolve_cb(Network::DnsResolver::ResolutionStatus::Failure, "", TestUtility::makeDnsResponse({})); + + // After the floored backoff now-last_used = 1100ms > host_ttl (1000ms): the host is evicted. + simTime().advanceTimeWait(std::chrono::milliseconds(1000)); + EXPECT_CALL(update_callbacks_, onDnsHostRemove("foo.com:80")); + resolve_timer->invokeCallback(); + checkStats(2 /* attempt */, 1 /* success */, 1 /* failure */, 1 /* address changed */, + 1 /* added */, 1 /* removed */, 0 /* num hosts */); +} + +TEST_F(DnsCacheImplTest, ResolveFailureBackoffZeroFlooredByMinRefresh) { + // Stub random() to return 0 so the jittered exponential backoff yields 0 + // (nextBackOffMs() returns random() % base_interval == 0). + ON_CALL(context_.server_context_.api_.random_, random()).WillByDefault(Return(0)); + *config_.mutable_dns_failure_refresh_rate()->mutable_base_interval() = + Protobuf::util::TimeUtil::SecondsToDuration(2); + *config_.mutable_dns_failure_refresh_rate()->mutable_max_interval() = + Protobuf::util::TimeUtil::SecondsToDuration(5); + *config_.mutable_host_ttl() = Protobuf::util::TimeUtil::SecondsToDuration(60); + *config_.mutable_dns_min_refresh_rate() = Protobuf::util::TimeUtil::SecondsToDuration(1); + initialize(); + InSequence s; + + MockLoadDnsCacheEntryCallbacks callbacks; + Network::DnsResolver::ResolveCb resolve_cb; + Event::MockTimer* resolve_timer = new Event::MockTimer(&context_.server_context_.dispatcher_); + Event::MockTimer* timeout_timer = new Event::MockTimer(&context_.server_context_.dispatcher_); + EXPECT_CALL(*timeout_timer, enableTimer(std::chrono::milliseconds(5000), nullptr)); + EXPECT_CALL(*resolver_, resolve("foo.com", _, _)) + .WillOnce(DoAll(SaveArg<2>(&resolve_cb), Return(&resolver_->active_query_))); + auto result = dns_cache_->loadDnsCacheEntry("foo.com", 80, false, callbacks); + EXPECT_EQ(DnsCache::LoadDnsCacheEntryStatus::Loading, result.status_); + + EXPECT_CALL(*timeout_timer, disableTimer()); + EXPECT_CALL( + update_callbacks_, + onDnsHostAddOrUpdate("foo.com:80", DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); + EXPECT_CALL(callbacks, + onLoadDnsCacheComplete(DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); + EXPECT_CALL(update_callbacks_, + onDnsResolutionComplete("foo.com:80", + DnsHostInfoEquals("10.0.0.1:80", "foo.com", false), + Network::DnsResolver::ResolutionStatus::Completed)); + EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(dns_ttl_), _)); + resolve_cb(Network::DnsResolver::ResolutionStatus::Completed, "", + TestUtility::makeDnsResponse({"10.0.0.1"})); + + checkStats(1 /* attempt */, 1 /* success */, 0 /* failure */, 1 /* address changed */, + 1 /* added */, 0 /* removed */, 1 /* num hosts */); + + // Advance a small amount so elapsed is well below host_ttl. + simTime().advanceTimeWait(std::chrono::milliseconds(100)); + + EXPECT_CALL(*timeout_timer, enableTimer(std::chrono::milliseconds(5000), nullptr)); + EXPECT_CALL(*resolver_, resolve("foo.com", _, _)) + .WillOnce(DoAll(SaveArg<2>(&resolve_cb), Return(&resolver_->active_query_))); + resolve_timer->invokeCallback(); + + // DNS fails. With random()==0 the jittered backoff returns 0. The TTL cap does not kick in + // (host_ttl=60s >> elapsed=100ms). The floor at dns_min_refresh_rate (1s) lifts it to 1000ms. + EXPECT_CALL(*timeout_timer, disableTimer()); + EXPECT_CALL(update_callbacks_, onDnsHostAddOrUpdate(_, _)).Times(0); + EXPECT_CALL(callbacks, onLoadDnsCacheComplete(_)).Times(0); + EXPECT_CALL(update_callbacks_, + onDnsResolutionComplete("foo.com:80", + DnsHostInfoEquals("10.0.0.1:80", "foo.com", false), + Network::DnsResolver::ResolutionStatus::Failure)); + EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(1000), _)); resolve_cb(Network::DnsResolver::ResolutionStatus::Failure, "", TestUtility::makeDnsResponse({})); + checkStats(2 /* attempt */, 1 /* success */, 1 /* failure */, 1 /* address changed */, + 1 /* added */, 0 /* removed */, 1 /* num hosts */); +} + +TEST_F(DnsCacheImplTest, ResolveFailurePastHostTtlFlooredByMinRefresh) { + *config_.mutable_host_ttl() = Protobuf::util::TimeUtil::SecondsToDuration(1); + *config_.mutable_dns_refresh_rate() = Protobuf::util::TimeUtil::SecondsToDuration(60); + *config_.mutable_dns_min_refresh_rate() = Protobuf::util::TimeUtil::SecondsToDuration(1); + initialize(); + InSequence s; + + MockLoadDnsCacheEntryCallbacks callbacks; + Network::DnsResolver::ResolveCb resolve_cb; + Event::MockTimer* resolve_timer = new Event::MockTimer(&context_.server_context_.dispatcher_); + Event::MockTimer* timeout_timer = new Event::MockTimer(&context_.server_context_.dispatcher_); + EXPECT_CALL(*timeout_timer, enableTimer(std::chrono::milliseconds(5000), nullptr)); + EXPECT_CALL(*resolver_, resolve("foo.com", _, _)) + .WillOnce(DoAll(SaveArg<2>(&resolve_cb), Return(&resolver_->active_query_))); + auto result = dns_cache_->loadDnsCacheEntry("foo.com", 80, false, callbacks); + EXPECT_EQ(DnsCache::LoadDnsCacheEntryStatus::Loading, result.status_); + + EXPECT_CALL(*timeout_timer, disableTimer()); + EXPECT_CALL( + update_callbacks_, + onDnsHostAddOrUpdate("foo.com:80", DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); + EXPECT_CALL(callbacks, + onLoadDnsCacheComplete(DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); + EXPECT_CALL(update_callbacks_, + onDnsResolutionComplete("foo.com:80", + DnsHostInfoEquals("10.0.0.1:80", "foo.com", false), + Network::DnsResolver::ResolutionStatus::Completed)); + EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(dns_ttl_), _)); + resolve_cb(Network::DnsResolver::ResolutionStatus::Completed, "", + TestUtility::makeDnsResponse({"10.0.0.1"})); + + checkStats(1 /* attempt */, 1 /* success */, 0 /* failure */, 1 /* address changed */, + 1 /* added */, 0 /* removed */, 1 /* num hosts */); + + // Advance a small amount so the alarm fires before TTL (now - last_used = 100ms < 1000ms = + // host_ttl), which causes onReResolveAlarm to startResolve rather than evict. + simTime().advanceTimeWait(std::chrono::milliseconds(100)); + + EXPECT_CALL(*timeout_timer, enableTimer(std::chrono::milliseconds(5000), nullptr)); + EXPECT_CALL(*resolver_, resolve("foo.com", _, _)) + .WillOnce(DoAll(SaveArg<2>(&resolve_cb), Return(&resolver_->active_query_))); + resolve_timer->invokeCallback(); + + // Advance past the TTL boundary so that elapsed = 1000ms >= host_ttl (1000ms) when + // finishResolve runs. + simTime().advanceTimeWait(std::chrono::milliseconds(900)); - // After the capped backoff now-last_used = 1001ms > host_ttl: the host is evicted. - simTime().advanceTimeWait(std::chrono::milliseconds(901)); + // DNS fails. elapsed >= host_ttl so the past-TTL branch sets refresh_interval to 0. + // The unconditional floor then lifts it to dns_min_refresh_rate (1000ms). + EXPECT_CALL(*timeout_timer, disableTimer()); + EXPECT_CALL(update_callbacks_, onDnsHostAddOrUpdate(_, _)).Times(0); + EXPECT_CALL(callbacks, onLoadDnsCacheComplete(_)).Times(0); + EXPECT_CALL(update_callbacks_, + onDnsResolutionComplete("foo.com:80", + DnsHostInfoEquals("10.0.0.1:80", "foo.com", false), + Network::DnsResolver::ResolutionStatus::Failure)); + // Past-TTL branch: raw backoff is discarded and refresh_interval becomes 0; + // the unconditional floor lifts it to dns_min_refresh_rate (1000ms). + EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(1000), _)); + resolve_cb(Network::DnsResolver::ResolutionStatus::Failure, "", TestUtility::makeDnsResponse({})); + + checkStats(2 /* attempt */, 1 /* success */, 1 /* failure */, 1 /* address changed */, + 1 /* added */, 0 /* removed */, 1 /* num hosts */); + + // After the floored interval now-last_used = 2000ms >= host_ttl (1000ms): the host is evicted. + simTime().advanceTimeWait(std::chrono::milliseconds(1000)); EXPECT_CALL(update_callbacks_, onDnsHostRemove("foo.com:80")); resolve_timer->invokeCallback(); checkStats(2 /* attempt */, 1 /* success */, 1 /* failure */, 1 /* address changed */, @@ -1036,6 +1172,7 @@ TEST_F(DnsCacheImplTest, ResolveFailureWithFailureRefreshRate) { Protobuf::util::TimeUtil::SecondsToDuration(7); *config_.mutable_dns_failure_refresh_rate()->mutable_max_interval() = Protobuf::util::TimeUtil::SecondsToDuration(10); + *config_.mutable_dns_min_refresh_rate() = Protobuf::util::TimeUtil::SecondsToDuration(1); initialize(); InSequence s; @@ -1089,6 +1226,7 @@ TEST_F(DnsCacheImplTest, ResolveFailureAfterResolveSuccessWithFailureRefreshRate Protobuf::util::TimeUtil::SecondsToDuration(2); *config_.mutable_dns_failure_refresh_rate()->mutable_max_interval() = Protobuf::util::TimeUtil::SecondsToDuration(5); + *config_.mutable_dns_min_refresh_rate() = Protobuf::util::TimeUtil::SecondsToDuration(1); initialize(); InSequence s;