From cfd12a5c8645c0ff977fa2fb4a79912b00729d40 Mon Sep 17 00:00:00 2001 From: Raymond Chen Date: Tue, 7 Feb 2023 10:26:57 -0800 Subject: [PATCH] Stack usage reduction in apartment switching, and lifetime fixes The big deal here is that we fix `co_await apartment_context` so that it consumes no stack[1] if you are already in the destination context. This is a big win for loops that do apartment switching. To do this, `impl::resume_apartment` now returns `true` if it wants the caller to resume the coroutine synchronously, rather than doing it directly. If the caller is `await_suspend`, then it can propagate that result to the coroutine infrastructure and avoid stack build-up. Fixed a lifetime issue in `apartment_awaiter`, which copied the context too soon. It needs to copy it in the `await_suspend`, because the coroutine is to resume synchronously in the new apartment, which under the old code would destruct the context while `await_suspend` was still using it. This shifts a refcount update from one place to another, so no net change. We no longer need to extend the lifetime of the `IAsyncAction` now that we detect and defer synchronous resumption. This deletes a refcount update. [1] Older builds of MSVC have code generation issues for `bool await_suspend`, so we deftly avoid them here, in the same way we avoided them for `co_await IAsyncAction`. (Though the existing code in many awaiters such as `final_suspend_awaiter` still use `bool await_suspend`, so maybe we'll just hope everybody migrates to MSVC 16.11 or later?) --- strings/base_coroutine_foundation.h | 8 ++++--- strings/base_coroutine_threadpool.h | 19 +++++++++++---- test/test/await_completed.cpp | 37 +++++++++++++++++++++++++++++ test/test_cpp20/await_completed.cpp | 14 +++++++++++ 4 files changed, 71 insertions(+), 7 deletions(-) diff --git a/strings/base_coroutine_foundation.h b/strings/base_coroutine_foundation.h index 6afeff607..1cfde4230 100644 --- a/strings/base_coroutine_foundation.h +++ b/strings/base_coroutine_foundation.h @@ -135,9 +135,12 @@ namespace winrt::impl } else { - resume_apartment(m_context, std::exchange(m_handle, {}), &m_awaiter->failure); + auto handle = std::exchange(m_handle, {}); + if (!resume_apartment(m_context, handle, &m_awaiter->failure)) + { + handle.resume(); + } } - } }; @@ -182,7 +185,6 @@ namespace winrt::impl private: auto register_completed_callback(coroutine_handle<> handle) { - auto extend_lifetime = async; async.Completed(disconnect_aware_handler(this, handle)); #ifdef _RESUMABLE_FUNCTIONS_SUPPORTED if (!suspending.exchange(false, std::memory_order_acquire)) diff --git a/strings/base_coroutine_threadpool.h b/strings/base_coroutine_threadpool.h index fb23f24c9..aa8a5b2b1 100644 --- a/strings/base_coroutine_threadpool.h +++ b/strings/base_coroutine_threadpool.h @@ -118,19 +118,22 @@ namespace winrt::impl WINRT_ASSERT(context.valid()); if ((context.m_context == nullptr) || (context.m_context == try_capture(WINRT_IMPL_CoGetObjectContext))) { - handle(); + return false; } else if (context.m_context_type == 1 /* APTTYPE_MTA */) { resume_background(handle); + return true; } else if (is_sta_thread()) { resume_apartment_on_threadpool(context.m_context, handle, failure); + return true; } else { resume_apartment_sync(context.m_context, handle, failure); + return true; } } } @@ -315,7 +318,7 @@ namespace winrt::impl { struct apartment_awaiter { - apartment_context context; // make a copy because resuming may destruct the original + apartment_context const& context; int32_t failure = 0; bool await_ready() const noexcept @@ -328,9 +331,17 @@ namespace winrt::impl check_hresult(failure); } - void await_suspend(impl::coroutine_handle<> handle) + auto await_suspend(impl::coroutine_handle<> handle) { - impl::resume_apartment(context.context, handle, &failure); + auto context_copy = context; +#ifdef _RESUMABLE_FUNCTIONS_SUPPORTED + if (!impl::resume_apartment(context_copy.context, handle, &failure)) + { + handle.resume(); + } +#else + return impl::resume_apartment(context_copy.context, handle, &failure); +#endif } }; diff --git a/test/test/await_completed.cpp b/test/test/await_completed.cpp index be33e818f..6af8dfa21 100644 --- a/test/test/await_completed.cpp +++ b/test/test/await_completed.cpp @@ -66,6 +66,38 @@ namespace // MSVC standard-conforming coroutines (as well as gcc and clang coroutines) // support "bool await_suspend" just fine. REQUIRE(consumed == 0); +#endif + } + + // co_await the same apartment context and confirm that stack does not grow. + // This is in await_completed.cpp because it's basically the same thing as awaiting + // an already-completed coroutine, so the test uses the same infrastructure. + IAsyncAction TestApartmentContextNop() + { + winrt::apartment_context same_context; + + uintptr_t initial = approximate_stack_pointer(); + co_await resume_sync_from_await_suspend(); + uintptr_t sync_usage = initial - approximate_stack_pointer(); + + initial = approximate_stack_pointer(); + co_await same_context; + uintptr_t consumed = initial - approximate_stack_pointer(); + +#ifdef _RESUMABLE_FUNCTIONS_SUPPORTED + // This branch is taken only for MSVC prerelease coroutines. + // + // MSVC prerelease coroutines prior to 16.11 do not implement "bool await_suspend" reliably, + // so we can't use it impl::apartment_awaiter. We must resume inline inside await_suspend, + // so there is a small amount of stack usage. (Pre-16.11 and post-16.11 prerelease coroutines + // are interoperable, so we cannot change behavior based on which compiler we are using, + // because that would introduce ODR violations. Our first opportunity to change behavior + // is the ABI breaking change with MSVC standard-conforming coroutines.) + REQUIRE(consumed <= sync_usage); +#else + // MSVC standard-conforming coroutines (as well as gcc and clang coroutines) + // support "bool await_suspend" just fine. + REQUIRE(consumed == 0); #endif } } @@ -73,3 +105,8 @@ TEST_CASE("await_completed_await") { SyncCompletion().get(); } + +TEST_CASE("apartment_context_nop") +{ + TestApartmentContextNop().get(); +} diff --git a/test/test_cpp20/await_completed.cpp b/test/test_cpp20/await_completed.cpp index 2448a3f88..3ae64e25e 100644 --- a/test/test_cpp20/await_completed.cpp +++ b/test/test_cpp20/await_completed.cpp @@ -38,8 +38,22 @@ namespace uintptr_t consumed = initial - approximate_stack_pointer(); REQUIRE(consumed == 0); } + + IAsyncAction TestApartmentContextNop() + { + // co_await the same apartment and confirm that stack does not grow. + winrt::apartment_context same_context; + uintptr_t initial = approximate_stack_pointer(); + co_await same_context; + uintptr_t consumed = initial - approximate_stack_pointer(); + REQUIRE(consumed == 0); + } } TEST_CASE("await_completed_await") { SyncCompletion().get(); +} +TEST_CASE("apartment_context_nop") +{ + TestApartmentContextNop().get(); } \ No newline at end of file