From 074487c327b053c996fb825dfe2f7f7f5bf66482 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 11 Feb 2026 15:19:06 -0800 Subject: [PATCH 01/24] Replace HashMap COOP transitions with Epoch-Based Reclamation (EBR) HashMap's async mode used GCX_MAYBE_COOP_NO_THREAD_BROKEN to transition into cooperative GC mode on every operation, preventing the GC from freeing obsolete bucket arrays mid-read. Old bucket arrays were queued via SyncClean::AddHashMap and freed during GC pauses. This caused a deadlock: when HashMap::LookupValue() was called while holding the DebuggerController lock, the COOP transition (which is level-equivalent to taking the ThreadStore lock) violated lock ordering constraints, since ThreadStore must be acquired before DebuggerController. Replace both mechanisms with Epoch-Based Reclamation (EBR), based on Fraser's algorithm from 'Practical Lock-Freedom' (UCAM-CL-TR-579): - EnterCriticalRegion/ExitCriticalRegion are simple atomic flag stores with memory barriers -- they never block or trigger GC transitions - Obsolete bucket arrays are queued for deferred deletion and freed once all threads have passed through a quiescent state - An RAII holder (EbrCriticalRegionHolder) replaces GCX_MAYBE_COOP at all 6 call sites in hash.cpp Changes: - New: src/coreclr/vm/ebr.h, ebr.cpp (EbrCollector, ~340 lines) - hash.cpp: Replace 6 GCX_MAYBE_COOP_NO_THREAD_BROKEN with EBR holders, replace SyncClean::AddHashMap with QueueForDeletion - syncclean.hpp/cpp: Remove HashMap-related members and cleanup code - ceemain.cpp: Init g_HashMapEbr at startup, shutdown at EE shutdown - CrstTypes.def: Add CrstEbrThreadList, CrstEbrPending - crsttypes_generated.h: Regenerated with new Crst types - CMakeLists.txt: Add ebr.cpp, ebr.h to build --- src/coreclr/inc/CrstTypes.def | 9 + src/coreclr/inc/crsttypes_generated.h | 162 +++++----- src/coreclr/vm/CMakeLists.txt | 2 + src/coreclr/vm/ceemain.cpp | 8 + src/coreclr/vm/ebr.cpp | 411 ++++++++++++++++++++++++++ src/coreclr/vm/ebr.h | 152 ++++++++++ src/coreclr/vm/hash.cpp | 57 ++-- src/coreclr/vm/syncclean.cpp | 32 -- src/coreclr/vm/syncclean.hpp | 3 - 9 files changed, 702 insertions(+), 134 deletions(-) create mode 100644 src/coreclr/vm/ebr.cpp create mode 100644 src/coreclr/vm/ebr.h diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 8cd820022658cd..6685868672ffeb 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -186,6 +186,15 @@ Crst DynamicMT AcquiredBefore CodeVersioning End +// EBR (Epoch-Based Reclamation) internal locks. These are leaf locks that +// protect the EBR thread list and pending deletion queues. They must never +// be held across HashMap operations or GC transitions. +Crst EbrPending +End + +Crst EbrThreadList +End + Crst EventStore End diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index 713f6bd5b33f33..02addac55c29a9 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -41,84 +41,86 @@ enum CrstType CrstDebuggerMutex = 23, CrstDynamicIL = 24, CrstDynamicMT = 25, - CrstEtwTypeLogHash = 26, - CrstEventPipe = 27, - CrstEventStore = 28, - CrstException = 29, - CrstExecutableAllocatorLock = 30, - CrstFCall = 31, - CrstFrozenObjectHeap = 32, - CrstFuncPtrStubs = 33, - CrstFusionAppCtx = 34, - CrstGCCover = 35, - CrstGenericDictionaryExpansion = 36, - CrstGlobalStrLiteralMap = 37, - CrstHandleTable = 38, - CrstIJWFixupData = 39, - CrstIJWHash = 40, - CrstILStubGen = 41, - CrstInlineTrackingMap = 42, - CrstInstMethodHashTable = 43, - CrstInterfaceDispatchGlobalLists = 44, - CrstInterop = 45, - CrstInteropData = 46, - CrstIsJMCMethod = 47, - CrstISymUnmanagedReader = 48, - CrstJit = 49, - CrstJitInlineTrackingMap = 50, - CrstJitPatchpoint = 51, - CrstJumpStubCache = 52, - CrstLeafLock = 53, - CrstListLock = 54, - CrstLoaderAllocator = 55, - CrstLoaderAllocatorReferences = 56, - CrstLoaderHeap = 57, - CrstManagedObjectWrapperMap = 58, - CrstMethodDescBackpatchInfoTracker = 59, - CrstMethodTableExposedObject = 60, - CrstModule = 61, - CrstModuleLookupTable = 62, - CrstMulticoreJitHash = 63, - CrstMulticoreJitManager = 64, - CrstNativeImageEagerFixups = 65, - CrstNativeImageLoad = 66, - CrstNotifyGdb = 67, - CrstPEImage = 68, - CrstPendingTypeLoadEntry = 69, - CrstPerfMap = 70, - CrstPgoData = 71, - CrstPinnedByrefValidation = 72, - CrstPinnedHeapHandleTable = 73, - CrstProfilerGCRefDataFreeList = 74, - CrstProfilingAPIStatus = 75, - CrstRCWCache = 76, - CrstRCWCleanupList = 77, - CrstReadyToRunEntryPointToMethodDescMap = 78, - CrstReflection = 79, - CrstReJITGlobalRequest = 80, - CrstSigConvert = 81, - CrstSingleUseLock = 82, - CrstStressLog = 83, - CrstStubCache = 84, - CrstStubDispatchCache = 85, - CrstSyncBlockCache = 86, - CrstSyncHashLock = 87, - CrstSystemDomain = 88, - CrstSystemDomainDelayedUnloadList = 89, - CrstThreadIdDispenser = 90, - CrstThreadLocalStorageLock = 91, - CrstThreadStore = 92, - CrstTieredCompilation = 93, - CrstTypeEquivalenceMap = 94, - CrstTypeIDMap = 95, - CrstUMEntryThunkCache = 96, - CrstUMEntryThunkFreeListLock = 97, - CrstUniqueStack = 98, - CrstUnresolvedClassLock = 99, - CrstUnwindInfoTableLock = 100, - CrstVSDIndirectionCellLock = 101, - CrstWrapperTemplate = 102, - kNumberOfCrstTypes = 103 + CrstEbrPending = 26, + CrstEbrThreadList = 27, + CrstEtwTypeLogHash = 28, + CrstEventPipe = 29, + CrstEventStore = 30, + CrstException = 31, + CrstExecutableAllocatorLock = 32, + CrstFCall = 33, + CrstFrozenObjectHeap = 34, + CrstFuncPtrStubs = 35, + CrstFusionAppCtx = 36, + CrstGCCover = 37, + CrstGenericDictionaryExpansion = 38, + CrstGlobalStrLiteralMap = 39, + CrstHandleTable = 40, + CrstIJWFixupData = 41, + CrstIJWHash = 42, + CrstILStubGen = 43, + CrstInlineTrackingMap = 44, + CrstInstMethodHashTable = 45, + CrstInterfaceDispatchGlobalLists = 46, + CrstInterop = 47, + CrstInteropData = 48, + CrstIsJMCMethod = 49, + CrstISymUnmanagedReader = 50, + CrstJit = 51, + CrstJitInlineTrackingMap = 52, + CrstJitPatchpoint = 53, + CrstJumpStubCache = 54, + CrstLeafLock = 55, + CrstListLock = 56, + CrstLoaderAllocator = 57, + CrstLoaderAllocatorReferences = 58, + CrstLoaderHeap = 59, + CrstManagedObjectWrapperMap = 60, + CrstMethodDescBackpatchInfoTracker = 61, + CrstMethodTableExposedObject = 62, + CrstModule = 63, + CrstModuleLookupTable = 64, + CrstMulticoreJitHash = 65, + CrstMulticoreJitManager = 66, + CrstNativeImageEagerFixups = 67, + CrstNativeImageLoad = 68, + CrstNotifyGdb = 69, + CrstPEImage = 70, + CrstPendingTypeLoadEntry = 71, + CrstPerfMap = 72, + CrstPgoData = 73, + CrstPinnedByrefValidation = 74, + CrstPinnedHeapHandleTable = 75, + CrstProfilerGCRefDataFreeList = 76, + CrstProfilingAPIStatus = 77, + CrstRCWCache = 78, + CrstRCWCleanupList = 79, + CrstReadyToRunEntryPointToMethodDescMap = 80, + CrstReflection = 81, + CrstReJITGlobalRequest = 82, + CrstSigConvert = 83, + CrstSingleUseLock = 84, + CrstStressLog = 85, + CrstStubCache = 86, + CrstStubDispatchCache = 87, + CrstSyncBlockCache = 88, + CrstSyncHashLock = 89, + CrstSystemDomain = 90, + CrstSystemDomainDelayedUnloadList = 91, + CrstThreadIdDispenser = 92, + CrstThreadLocalStorageLock = 93, + CrstThreadStore = 94, + CrstTieredCompilation = 95, + CrstTypeEquivalenceMap = 96, + CrstTypeIDMap = 97, + CrstUMEntryThunkCache = 98, + CrstUMEntryThunkFreeListLock = 99, + CrstUniqueStack = 100, + CrstUnresolvedClassLock = 101, + CrstUnwindInfoTableLock = 102, + CrstVSDIndirectionCellLock = 103, + CrstWrapperTemplate = 104, + kNumberOfCrstTypes = 105 }; #endif // __CRST_TYPES_INCLUDED @@ -155,6 +157,8 @@ int g_rgCrstLevelMap[] = 12, // CrstDebuggerMutex 0, // CrstDynamicIL 9, // CrstDynamicMT + 0, // CrstEbrPending + 0, // CrstEbrThreadList 0, // CrstEtwTypeLogHash 19, // CrstEventPipe 0, // CrstEventStore @@ -263,6 +267,8 @@ LPCSTR g_rgCrstNameMap[] = "CrstDebuggerMutex", "CrstDynamicIL", "CrstDynamicMT", + "CrstEbrPending", + "CrstEbrThreadList", "CrstEtwTypeLogHash", "CrstEventPipe", "CrstEventStore", diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 982ea2ba74831b..3a4c0babdab259 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -70,6 +70,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON disassembler.cpp domainassembly.cpp dynamicmethod.cpp + ebr.cpp ecall.cpp eedbginterfaceimpl.cpp eehash.cpp @@ -169,6 +170,7 @@ set(VM_HEADERS_DAC_AND_WKS_COMMON disassembler.h domainassembly.h dynamicmethod.h + ebr.h ecall.h eedbginterfaceimpl.h eedbginterfaceimpl.inl diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 11a9b8b5e00923..ba62c786a0c868 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -123,6 +123,7 @@ #include "clsload.hpp" #include "object.h" #include "hash.h" +#include "ebr.h" #include "ecall.h" #include "ceemain.h" #include "dllimport.h" @@ -786,6 +787,10 @@ void EEStartupHelper() // Cache the (potentially user-overridden) values now so they are accessible from asm routines InitializeSpinConstants(); + // Initialize EBR (Epoch-Based Reclamation) for HashMap's async mode. + // This must be done before any HashMap is initialized with fAsyncMode=TRUE. + g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, 1024 * 1024); + StubManager::InitializeStubManagers(); // Set up the cor handle map. This map is used to load assemblies in @@ -1381,6 +1386,9 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading) #ifdef LOGGING ShutdownLogging(); #endif + // Shutdown EBR before GC heap to ensure all deferred deletions are drained. + g_HashMapEbr.Shutdown(); + GCHeapUtilities::GetGCHeap()->Shutdown(); } } diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp new file mode 100644 index 00000000000000..908a5138e42787 --- /dev/null +++ b/src/coreclr/vm/ebr.cpp @@ -0,0 +1,411 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "ebr.h" + +// ============================================ +// Per-thread EBR state +// ============================================ + +// Bit flag indicating the thread is in a critical region. +// Combined with the epoch value in m_localEpoch for a single atomic field. +static constexpr uint32_t ACTIVE_FLAG = 0x80000000U; + +struct EbrThreadData +{ + EbrCollector* m_pCollector; + + // Local epoch with ACTIVE_FLAG. When the thread is quiescent (outside a + // critical region) ACTIVE_FLAG is cleared and the epoch bits are zero. + Volatile m_localEpoch; + + // Nesting depth for re-entrant critical regions. + uint32_t m_criticalDepth; + + // Intrusive linked list through the collector's thread list. + EbrThreadData* m_pNext; +}; + +// Each thread holds its own EbrThreadData via thread_local. +// We support only a single collector per process; if multiple collectors +// are needed this can be extended to a per-collector TLS map, but for the +// HashMap use-case a single global collector suffices. +static thread_local EbrThreadData* t_pThreadData = nullptr; + +// Global EBR collector for HashMap's async mode. +EbrCollector g_HashMapEbr; + +// ============================================ +// EbrCollector implementation +// ============================================ + +void +EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudget) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + _ASSERTE(!m_initialized); + + m_memoryBudget = memoryBudget; + m_globalEpoch = 0; + m_pendingSize = 0; + m_pThreadListHead = nullptr; + for (uint32_t i = 0; i < EBR_EPOCHS; i++) + m_pPendingHeads[i] = nullptr; + + m_threadListLock.Init(crstThreadList); + m_pendingLock.Init(crstPending); + + m_initialized = true; +} + +void +EbrCollector::Shutdown() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + + // Drain all pending queues unconditionally. + { + CrstHolder lock(&m_pendingLock); + for (uint32_t i = 0; i < EBR_EPOCHS; i++) + DrainQueue(i); + } + + // Free any remaining thread data entries. Threads should have exited + // their critical regions by now. We walk the list and delete them. + { + CrstHolder lock(&m_threadListLock); + EbrThreadData* pCur = m_pThreadListHead; + while (pCur != nullptr) + { + EbrThreadData* pNext = pCur->m_pNext; + delete pCur; + pCur = pNext; + } + m_pThreadListHead = nullptr; + } + + m_threadListLock.Destroy(); + m_pendingLock.Destroy(); + + m_initialized = false; +} + +// ============================================ +// Thread registration +// ============================================ + +EbrThreadData* +EbrCollector::GetOrCreateThreadData() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + EbrThreadData* pData = t_pThreadData; + if (pData != nullptr && pData->m_pCollector == this) + return pData; + + // Allocate new per-thread data. + pData = new (nothrow) EbrThreadData(); + if (pData == nullptr) + { + _ASSERTE(!"EBR: failed to allocate thread data"); + return nullptr; + } + + pData->m_pCollector = this; + pData->m_localEpoch = 0; + pData->m_criticalDepth = 0; + pData->m_pNext = nullptr; + + // Store in thread_local. + t_pThreadData = pData; + + // Link into the collector's thread list. + { + CrstHolder lock(&m_threadListLock); + pData->m_pNext = m_pThreadListHead; + m_pThreadListHead = pData; + } + + return pData; +} + +void +EbrCollector::UnlinkThreadData(EbrThreadData* pData) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + if (pData == nullptr) + return; + + CrstHolder lock(&m_threadListLock); + EbrThreadData** pp = &m_pThreadListHead; + while (*pp != nullptr) + { + if (*pp == pData) + { + *pp = pData->m_pNext; + break; + } + pp = &(*pp)->m_pNext; + } +} + +// ============================================ +// Critical region enter/exit +// ============================================ + +void +EbrCollector::EnterCriticalRegion() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + + EbrThreadData* pData = GetOrCreateThreadData(); + if (pData == nullptr) + return; + + pData->m_criticalDepth++; + + if (pData->m_criticalDepth == 1) + { + // Outermost entry: observe the global epoch and set ACTIVE_FLAG. + uint32_t epoch = m_globalEpoch; + pData->m_localEpoch = (epoch | ACTIVE_FLAG); + + // Full fence to ensure the epoch observation is visible before any + // reads in the critical region. This pairs with the acquire fence + // in TryAdvanceEpoch's scan. + MemoryBarrier(); + } +} + +void +EbrCollector::ExitCriticalRegion() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + + EbrThreadData* pData = t_pThreadData; + _ASSERTE(pData != nullptr && pData->m_pCollector == this); + _ASSERTE(pData->m_criticalDepth > 0); + + pData->m_criticalDepth--; + + if (pData->m_criticalDepth == 0) + { + // Outermost exit: ensure all stores in the critical path are visible + // before clearing the active flag. + MemoryBarrier(); + pData->m_localEpoch = 0; + } +} + +bool +EbrCollector::InCriticalRegion() +{ + LIMITED_METHOD_CONTRACT; + + EbrThreadData* pData = t_pThreadData; + if (pData == nullptr || pData->m_pCollector != this) + return false; + return pData->m_criticalDepth > 0; +} + +// ============================================ +// Epoch advancement +// ============================================ + +bool +EbrCollector::CanAdvanceEpoch() +{ + LIMITED_METHOD_CONTRACT; + + // Caller must hold m_threadListLock. + uint32_t currentEpoch = m_globalEpoch; + + EbrThreadData* pData = m_pThreadListHead; + while (pData != nullptr) + { + uint32_t localEpoch = pData->m_localEpoch; + bool active = (localEpoch & ACTIVE_FLAG) != 0; + + if (active) + { + // If an active thread has not yet observed the current epoch, + // we cannot advance. + if (localEpoch != (currentEpoch | ACTIVE_FLAG)) + return false; + } + + pData = pData->m_pNext; + } + + return true; +} + +bool +EbrCollector::TryAdvanceEpoch() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + CrstHolder lock(&m_threadListLock); + + // Acquire fence to synchronize with the MemoryBarrier() calls in + // EnterCriticalRegion / ExitCriticalRegion. + MemoryBarrier(); + + if (!CanAdvanceEpoch()) + return false; + + uint32_t newEpoch = ((uint32_t)m_globalEpoch + 1) % EBR_EPOCHS; + m_globalEpoch = newEpoch; + return true; +} + +// ============================================ +// Deferred deletion +// ============================================ + +size_t +EbrCollector::DrainQueue(uint32_t slot) +{ + LIMITED_METHOD_CONTRACT; + + size_t freedSize = 0; + + EbrPendingEntry* pEntry = m_pPendingHeads[slot]; + m_pPendingHeads[slot] = nullptr; + + while (pEntry != nullptr) + { + EbrPendingEntry* pNext = pEntry->m_pNext; + pEntry->m_pfnDelete(pEntry->m_pObject); + freedSize += pEntry->m_estimatedSize; + delete pEntry; + pEntry = pNext; + } + + return freedSize; +} + +void +EbrCollector::TryReclaim() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + if (TryAdvanceEpoch()) + { + CrstHolder lock(&m_pendingLock); + + // Objects retired 2 epochs ago are safe to delete. With 3 epochs + // and clock arithmetic, the safe slot is (current + 1) % 3. + uint32_t currentEpoch = m_globalEpoch; + uint32_t safeSlot = (currentEpoch + 1) % EBR_EPOCHS; + + size_t freed = DrainQueue(safeSlot); + if (freed > 0) + { + _ASSERTE((size_t)m_pendingSize >= freed); + m_pendingSize = (size_t)m_pendingSize - freed; + } + } +} + +void +EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + _ASSERTE(pObject != nullptr); + _ASSERTE(pfnDelete != nullptr); + + // Must be in a critical region. + EbrThreadData* pData = t_pThreadData; + _ASSERTE(pData != nullptr && pData->m_pCollector == this && pData->m_criticalDepth > 0); + + // Allocate pending entry. + EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(); + if (pEntry == nullptr) + { + // If we can't allocate, delete immediately. This is safe because we're + // in a critical region and hold the writer lock, so no readers can be + // referencing pObject (the writer just replaced it atomically). + // This is a degraded fallback, not the normal path. + pfnDelete(pObject); + return; + } + + pEntry->m_pObject = pObject; + pEntry->m_pfnDelete = pfnDelete; + pEntry->m_estimatedSize = estimatedSize; + pEntry->m_pNext = nullptr; + + // Insert into the current epoch's pending list. + { + CrstHolder lock(&m_pendingLock); + + uint32_t slot = m_globalEpoch; + pEntry->m_pNext = m_pPendingHeads[slot]; + m_pPendingHeads[slot] = pEntry; + m_pendingSize = (size_t)m_pendingSize + estimatedSize; + } + + // Try reclamation if over budget. + if ((size_t)m_pendingSize > m_memoryBudget) + TryReclaim(); +} diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h new file mode 100644 index 00000000000000..db59c4a15c399b --- /dev/null +++ b/src/coreclr/vm/ebr.h @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// ebr.h - Epoch-Based Reclamation for safe memory reclamation +// +// Implements the EBR algorithm from K. Fraser, "Practical Lock-Freedom" +// (UCAM-CL-TR-579). Provides safe, low-overhead deferred deletion for +// concurrent data structures without requiring GC suspension or COOP +// mode transitions. +// +// Usage: +// // Startup: +// g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, budgetBytes); +// +// // Reader/Writer thread: +// { +// EbrCriticalRegionHolder ebr(&g_HashMapEbr, m_fAsyncMode); +// // ... access shared data safely ... +// // Objects queued for deletion will not be freed while any thread +// // is in a critical region that observed a prior epoch. +// } +// +// // Writer thread (inside critical region), after replacing shared pointer: +// g_HashMapEbr.QueueForDeletion(pOldData, deleteFn, sizeEstimate); +// +// // Shutdown: +// g_HashMapEbr.Shutdown(); + +#ifndef __EBR_H__ +#define __EBR_H__ + +// Number of epoch slots: current, current-1, current-2 +static constexpr uint32_t EBR_EPOCHS = 3; + +// Callback to delete a retired object. +typedef void (*EbrDeleteFunc)(void* pObject); + +// Forward declarations +struct EbrThreadData; + +// Singly-linked list node for pending deletions. +struct EbrPendingEntry +{ + void* m_pObject; + EbrDeleteFunc m_pfnDelete; + size_t m_estimatedSize; + EbrPendingEntry* m_pNext; +}; + +// EBR Collector - manages epoch-based deferred reclamation. +// +// A single collector instance is typically shared across all threads that +// access a particular set of shared data structures. +class EbrCollector +{ +public: + EbrCollector() = default; + ~EbrCollector() = default; + + // Non-copyable + EbrCollector(const EbrCollector&) = delete; + EbrCollector& operator=(const EbrCollector&) = delete; + + // Initialize the collector. + // crstThreadList: Crst type for the thread list lock + // crstPending: Crst type for the pending deletion queue lock + // memoryBudget: approximate byte threshold of pending deletions before + // attempting reclamation + void Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudget); + + // Shutdown the collector, draining all pending deletions. + // All threads should have exited their critical regions before calling. + void Shutdown(); + + // Enter a critical region. While in a critical region, objects queued for + // deletion will not be freed. Re-entrant: nested calls are counted. + void EnterCriticalRegion(); + + // Exit a critical region. Must pair with EnterCriticalRegion. + void ExitCriticalRegion(); + + // Queue an object for deferred deletion. Must be called from within a + // critical region. The object will be deleted via pfnDelete once all + // threads have passed through a quiescent state. + // pObject: the object to retire (must not be nullptr) + // pfnDelete: function to call to delete the object + // estimatedSize: approximate size in bytes (for budget tracking) + void QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize); + + // Returns true if the calling thread is currently in a critical region. + bool InCriticalRegion(); + +private: + // Thread list management + EbrThreadData* GetOrCreateThreadData(); + void UnlinkThreadData(EbrThreadData* pData); + + // Epoch management + bool CanAdvanceEpoch(); + bool TryAdvanceEpoch(); + + // Reclamation + size_t DrainQueue(uint32_t slot); + void TryReclaim(); + + // Configuration + size_t m_memoryBudget = 0; + bool m_initialized = false; + + // Global epoch counter [0, EBR_EPOCHS-1] + Volatile m_globalEpoch; + + // Registered thread list (protected by m_threadListLock) + CrstStatic m_threadListLock; + EbrThreadData* m_pThreadListHead = nullptr; + + // Pending deletion queues, one per epoch slot (protected by m_pendingLock) + CrstStatic m_pendingLock; + EbrPendingEntry* m_pPendingHeads[EBR_EPOCHS] = {}; + Volatile m_pendingSize; +}; + +// Global EBR collector for HashMap's async mode. +extern EbrCollector g_HashMapEbr; + +// RAII holder for EBR critical regions, analogous to GCX_COOP pattern. +// When fEnable is false, the holder is a no-op. +class EbrCriticalRegionHolder +{ +public: + EbrCriticalRegionHolder(EbrCollector* pCollector, bool fEnable) + : m_pCollector(fEnable ? pCollector : nullptr) + { + if (m_pCollector != nullptr) + m_pCollector->EnterCriticalRegion(); + } + + ~EbrCriticalRegionHolder() + { + if (m_pCollector != nullptr) + m_pCollector->ExitCriticalRegion(); + } + + // Non-copyable + EbrCriticalRegionHolder(const EbrCriticalRegionHolder&) = delete; + EbrCriticalRegionHolder& operator=(const EbrCriticalRegionHolder&) = delete; + +private: + EbrCollector* m_pCollector; +}; + +#endif // __EBR_H__ diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index 288601b1e7e04e..c3e48e6e8e101f 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -17,6 +17,7 @@ Module Name: #include "excep.h" #include "syncclean.hpp" +#include "ebr.h" #include "threadsuspend.h" #include "minipal/time.h" @@ -94,6 +95,18 @@ BOOL Bucket::InsertValue(const UPTR key, const UPTR value) #endif // !DACCESS_COMPILE +// Static helper for EBR deferred deletion of obsolete bucket arrays. +static void DeleteObsoleteBuckets(void* p) +{ + LIMITED_METHOD_CONTRACT; + Bucket* pBucket = (Bucket*)p; + while (pBucket) { + Bucket* pNextBucket = NextObsolete(pBucket); + delete [] pBucket; + pBucket = pNextBucket; + } +} + //--------------------------------------------------------------------- // inline Bucket* HashMap::Buckets() // get the pointer to the bucket array @@ -103,7 +116,7 @@ PTR_Bucket HashMap::Buckets() LIMITED_METHOD_DAC_CONTRACT; #if !defined(DACCESS_COMPILE) - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled() || IsGCThread()); + _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || g_HashMapEbr.InCriticalRegion() || IsGCThread()); #endif return m_rgBuckets + 1; } @@ -477,8 +490,10 @@ void HashMap::InsertValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + // Enter EBR critical region to protect against concurrent bucket array + // replacement during async mode. Replaces the former COOP transition + // which caused deadlocks with DebuggerController lock ordering. + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); @@ -542,14 +557,12 @@ UPTR HashMap::LookupValue(UPTR key, UPTR value) #ifndef DACCESS_COMPILE _ASSERTE (m_fAsyncMode || OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - // Also called by AppDomain::FindCachedAssembly to resolve AssemblyRef -- this is used by stack walking on the GC thread. - // See comments in GCHeapUtilities::RestartEE (above the call to SyncClean::CleanUp) for reason to enter COOP mode. - // However, if the current thread is the GC thread, we know we're not going to call GCHeapUtilities::RestartEE - // while accessing the HashMap, so it's safe to proceed. - // (m_fAsyncMode && !IsGCThread() is the condition for entering COOP mode. I.e., enable COOP GC only if - // the HashMap is in async mode and this is not a GC thread.) - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode && !IsGCThread()); + // Enter EBR critical region to protect against concurrent bucket array + // replacement during async mode. This replaces the former COOP transition + // (GCX_MAYBE_COOP_NO_THREAD_BROKEN) which caused a deadlock when called + // while holding the DebuggerController lock. The GC thread is excluded + // since it cannot race with itself on RestartEE. + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode && !IsGCThread()); ASSERT(m_rgBuckets != NULL); // This is necessary in case some other thread @@ -621,8 +634,7 @@ UPTR HashMap::ReplaceValue(UPTR key, UPTR value) _ASSERTE(OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // This is necessary in case some other thread @@ -695,8 +707,7 @@ UPTR HashMap::DeleteValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); // check proper use in synchronous mode SyncAccessHolder holoder(this); //no-op in non DEBUG code @@ -867,10 +878,9 @@ void HashMap::Rehash() STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_FAULT; - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled()); + _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || g_HashMapEbr.InCriticalRegion()); _ASSERTE (OwnLock()); UPTR newPrimeIndex = NewSize(); @@ -989,8 +999,13 @@ void HashMap::Rehash() if (m_fAsyncMode) { - // If we are allowing asynchronous reads, we must delay bucket cleanup until GC time. - SyncClean::AddHashMap (pObsoleteTables); + // In async mode, readers may still be traversing the old bucket array. + // Queue for deferred deletion via EBR. The buckets will be freed once + // all threads have exited their critical regions. + g_HashMapEbr.QueueForDeletion( + pObsoleteTables, + DeleteObsoleteBuckets, + (GetSize(pObsoleteTables) + 1) * sizeof(Bucket)); } else { @@ -1020,7 +1035,7 @@ void HashMap::Compact() _ASSERTE (OwnLock()); // - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // Try to resize if that makes sense (reduce the size of the table), but diff --git a/src/coreclr/vm/syncclean.cpp b/src/coreclr/vm/syncclean.cpp index 2723047f0901dc..f0d3bdbd59c165 100644 --- a/src/coreclr/vm/syncclean.cpp +++ b/src/coreclr/vm/syncclean.cpp @@ -12,7 +12,6 @@ #include "interpexec.h" #endif -VolatilePtr SyncClean::m_HashMap = NULL; VolatilePtr SyncClean::m_EEHashTable; void SyncClean::Terminate() @@ -25,26 +24,6 @@ void SyncClean::Terminate() CleanUp(); } -void SyncClean::AddHashMap (Bucket *bucket) -{ - WRAPPER_NO_CONTRACT; - - if (!g_fEEStarted) { - delete [] bucket; - return; - } - - _ASSERTE (GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled()); - - Bucket * pTempBucket = NULL; - do - { - pTempBucket = (Bucket *)m_HashMap; - NextObsolete (bucket) = pTempBucket; - } - while (InterlockedCompareExchangeT(m_HashMap.GetPointer(), bucket, pTempBucket) != pTempBucket); -} - void SyncClean::AddEEHashTable (EEHashEntry** entry) { WRAPPER_NO_CONTRACT; @@ -73,17 +52,6 @@ void SyncClean::CleanUp () _ASSERTE (IsAtProcessExit() || IsGCSpecialThread() || (GCHeapUtilities::IsGCInProgress() && GetThreadNULLOk() == ThreadSuspend::GetSuspensionThread())); - if (m_HashMap) - { - Bucket * pTempBucket = InterlockedExchangeT(m_HashMap.GetPointer(), NULL); - - while (pTempBucket) - { - Bucket* pNextBucket = NextObsolete (pTempBucket); - delete [] pTempBucket; - pTempBucket = pNextBucket; - } - } if (m_EEHashTable) { diff --git a/src/coreclr/vm/syncclean.hpp b/src/coreclr/vm/syncclean.hpp index c203b7245d103c..cec8ca33508ecc 100644 --- a/src/coreclr/vm/syncclean.hpp +++ b/src/coreclr/vm/syncclean.hpp @@ -9,7 +9,6 @@ // To make this work, we need to make sure that these data are accessed in cooperative GC // mode. -class Bucket; struct EEHashEntry; class Crst; class CrstStatic; @@ -18,12 +17,10 @@ class SyncClean { public: static void Terminate (); - static void AddHashMap (Bucket *bucket); static void AddEEHashTable (EEHashEntry** entry); static void CleanUp (); private: - static VolatilePtr m_HashMap; // Cleanup list for HashMap static VolatilePtr m_EEHashTable; // Cleanup list for EEHashTable }; #endif From 55708d08330af54fb164d19fd2be90f83c41460b Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 11 Feb 2026 16:41:06 -0800 Subject: [PATCH 02/24] EBR cleanup: naming, scoping, and style improvements - Rename memoryBudget/m_pendingSize to memoryBudgetInBytes/m_pendingSizeInBytes - Mark EbrCollector and EbrCriticalRegionHolder as final - Delete move constructors/assignment operators - Move NextObsolete from hash.h (public) to hash.cpp (file-static) - Reuse DeleteObsoleteBuckets for sync-mode path in Rehash - Trim redundant backstory comments at EBR call sites - Remove unused forward decls from syncclean.hpp --- src/coreclr/vm/ceemain.cpp | 2 +- src/coreclr/vm/ebr.cpp | 14 +++++++------- src/coreclr/vm/ebr.h | 20 +++++++++++--------- src/coreclr/vm/hash.cpp | 27 ++++++++++++++------------- src/coreclr/vm/hash.h | 11 ----------- src/coreclr/vm/syncclean.hpp | 5 ++--- 6 files changed, 35 insertions(+), 44 deletions(-) diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index ba62c786a0c868..b72e7abba11886 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -789,7 +789,7 @@ void EEStartupHelper() // Initialize EBR (Epoch-Based Reclamation) for HashMap's async mode. // This must be done before any HashMap is initialized with fAsyncMode=TRUE. - g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, 1024 * 1024); + g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, /* memoryBudgetInBytes */ 1024 * 1024); StubManager::InitializeStubManagers(); diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 908a5138e42787..30cbfc94c4b034 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -41,7 +41,7 @@ EbrCollector g_HashMapEbr; // ============================================ void -EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudget) +EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudgetInBytes) { CONTRACTL { @@ -52,9 +52,9 @@ EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryB _ASSERTE(!m_initialized); - m_memoryBudget = memoryBudget; + m_memoryBudgetInBytes = memoryBudgetInBytes; m_globalEpoch = 0; - m_pendingSize = 0; + m_pendingSizeInBytes = 0; m_pThreadListHead = nullptr; for (uint32_t i = 0; i < EBR_EPOCHS; i++) m_pPendingHeads[i] = nullptr; @@ -353,8 +353,8 @@ EbrCollector::TryReclaim() size_t freed = DrainQueue(safeSlot); if (freed > 0) { - _ASSERTE((size_t)m_pendingSize >= freed); - m_pendingSize = (size_t)m_pendingSize - freed; + _ASSERTE((size_t)m_pendingSizeInBytes >= freed); + m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes - freed; } } } @@ -402,10 +402,10 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es uint32_t slot = m_globalEpoch; pEntry->m_pNext = m_pPendingHeads[slot]; m_pPendingHeads[slot] = pEntry; - m_pendingSize = (size_t)m_pendingSize + estimatedSize; + m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes + estimatedSize; } // Try reclamation if over budget. - if ((size_t)m_pendingSize > m_memoryBudget) + if ((size_t)m_pendingSizeInBytes > m_memoryBudgetInBytes) TryReclaim(); } diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index db59c4a15c399b..8ae44226516b72 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -51,22 +51,23 @@ struct EbrPendingEntry // // A single collector instance is typically shared across all threads that // access a particular set of shared data structures. -class EbrCollector +class EbrCollector final { public: EbrCollector() = default; ~EbrCollector() = default; - // Non-copyable EbrCollector(const EbrCollector&) = delete; EbrCollector& operator=(const EbrCollector&) = delete; + EbrCollector(EbrCollector&&) = delete; + EbrCollector& operator=(EbrCollector&&) = delete; // Initialize the collector. // crstThreadList: Crst type for the thread list lock // crstPending: Crst type for the pending deletion queue lock - // memoryBudget: approximate byte threshold of pending deletions before - // attempting reclamation - void Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudget); + // memoryBudgetInBytes: approximate byte threshold of pending deletions before + // attempting reclamation + void Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudgetInBytes); // Shutdown the collector, draining all pending deletions. // All threads should have exited their critical regions before calling. @@ -104,7 +105,7 @@ class EbrCollector void TryReclaim(); // Configuration - size_t m_memoryBudget = 0; + size_t m_memoryBudgetInBytes = 0; bool m_initialized = false; // Global epoch counter [0, EBR_EPOCHS-1] @@ -117,7 +118,7 @@ class EbrCollector // Pending deletion queues, one per epoch slot (protected by m_pendingLock) CrstStatic m_pendingLock; EbrPendingEntry* m_pPendingHeads[EBR_EPOCHS] = {}; - Volatile m_pendingSize; + Volatile m_pendingSizeInBytes; }; // Global EBR collector for HashMap's async mode. @@ -125,7 +126,7 @@ extern EbrCollector g_HashMapEbr; // RAII holder for EBR critical regions, analogous to GCX_COOP pattern. // When fEnable is false, the holder is a no-op. -class EbrCriticalRegionHolder +class EbrCriticalRegionHolder final { public: EbrCriticalRegionHolder(EbrCollector* pCollector, bool fEnable) @@ -141,9 +142,10 @@ class EbrCriticalRegionHolder m_pCollector->ExitCriticalRegion(); } - // Non-copyable EbrCriticalRegionHolder(const EbrCriticalRegionHolder&) = delete; EbrCriticalRegionHolder& operator=(const EbrCriticalRegionHolder&) = delete; + EbrCriticalRegionHolder(EbrCriticalRegionHolder&&) = delete; + EbrCriticalRegionHolder& operator=(EbrCriticalRegionHolder&&) = delete; private: EbrCollector* m_pCollector; diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index c3e48e6e8e101f..3246a24219aa6c 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -95,12 +95,22 @@ BOOL Bucket::InsertValue(const UPTR key, const UPTR value) #endif // !DACCESS_COMPILE +//--------------------------------------------------------------------- +// inline Bucket*& NextObsolete (Bucket* rgBuckets) +// get the next obsolete bucket in the chain +static Bucket*& NextObsolete(Bucket* rgBuckets) +{ + LIMITED_METHOD_CONTRACT; + return *(Bucket**)&((size_t*)rgBuckets)[1]; +} + // Static helper for EBR deferred deletion of obsolete bucket arrays. static void DeleteObsoleteBuckets(void* p) { LIMITED_METHOD_CONTRACT; Bucket* pBucket = (Bucket*)p; - while (pBucket) { + while (pBucket) + { Bucket* pNextBucket = NextObsolete(pBucket); delete [] pBucket; pBucket = pNextBucket; @@ -491,8 +501,7 @@ void HashMap::InsertValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); // Enter EBR critical region to protect against concurrent bucket array - // replacement during async mode. Replaces the former COOP transition - // which caused deadlocks with DebuggerController lock ordering. + // replacement during async mode. EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); @@ -558,9 +567,7 @@ UPTR HashMap::LookupValue(UPTR key, UPTR value) _ASSERTE (m_fAsyncMode || OwnLock()); // Enter EBR critical region to protect against concurrent bucket array - // replacement during async mode. This replaces the former COOP transition - // (GCX_MAYBE_COOP_NO_THREAD_BROKEN) which caused a deadlock when called - // while holding the DebuggerController lock. The GC thread is excluded + // replacement during async mode. The GC thread is excluded // since it cannot race with itself on RestartEE. EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode && !IsGCThread()); @@ -1009,14 +1016,8 @@ void HashMap::Rehash() } else { - Bucket* pBucket = pObsoleteTables; - while (pBucket) { - Bucket* pNextBucket = NextObsolete(pBucket); - delete [] pBucket; - pBucket = pNextBucket; - } + DeleteObsoleteBuckets(pObsoleteTables); } - } //--------------------------------------------------------------------- diff --git a/src/coreclr/vm/hash.h b/src/coreclr/vm/hash.h index a3d1fcd3dcf955..768da9dd07e012 100644 --- a/src/coreclr/vm/hash.h +++ b/src/coreclr/vm/hash.h @@ -793,15 +793,4 @@ class PtrHashMap #endif // DACCESS_COMPILE }; -//--------------------------------------------------------------------- -// inline Bucket*& NextObsolete (Bucket* rgBuckets) -// get the next obsolete bucket in the chain -inline -Bucket*& NextObsolete (Bucket* rgBuckets) -{ - LIMITED_METHOD_CONTRACT; - - return *(Bucket**)&((size_t*)rgBuckets)[1]; -} - #endif // !_HASH_H_ diff --git a/src/coreclr/vm/syncclean.hpp b/src/coreclr/vm/syncclean.hpp index cec8ca33508ecc..7b8761c76c765b 100644 --- a/src/coreclr/vm/syncclean.hpp +++ b/src/coreclr/vm/syncclean.hpp @@ -10,10 +10,9 @@ // mode. struct EEHashEntry; -class Crst; -class CrstStatic; -class SyncClean { +class SyncClean final +{ public: static void Terminate (); From 73168b4eef7959f369b161c6a3ef64b6b86c7a01 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 11 Feb 2026 16:55:16 -0800 Subject: [PATCH 03/24] Refactor EbrPendingEntry structure: move definition from ebr.h to ebr.cpp --- src/coreclr/vm/ebr.cpp | 9 +++++++++ src/coreclr/vm/ebr.h | 10 +--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 30cbfc94c4b034..3798b56f1256d5 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -27,6 +27,15 @@ struct EbrThreadData EbrThreadData* m_pNext; }; +// Singly-linked list node for pending deletions. +struct EbrPendingEntry +{ + void* m_pObject; + EbrDeleteFunc m_pfnDelete; + size_t m_estimatedSize; + EbrPendingEntry* m_pNext; +}; + // Each thread holds its own EbrThreadData via thread_local. // We support only a single collector per process; if multiple collectors // are needed this can be extended to a per-collector TLS map, but for the diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 8ae44226516b72..995f3498bd8138 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -37,15 +37,7 @@ typedef void (*EbrDeleteFunc)(void* pObject); // Forward declarations struct EbrThreadData; - -// Singly-linked list node for pending deletions. -struct EbrPendingEntry -{ - void* m_pObject; - EbrDeleteFunc m_pfnDelete; - size_t m_estimatedSize; - EbrPendingEntry* m_pNext; -}; +struct EbrPendingEntry; // EBR Collector - manages epoch-based deferred reclamation. // From d6acf4e0ca2a1912385954e361201635bf4f69d0 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 11 Feb 2026 17:37:19 -0800 Subject: [PATCH 04/24] Update src/coreclr/vm/hash.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/coreclr/vm/hash.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index 3246a24219aa6c..cec068ed4d85c8 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -16,7 +16,6 @@ Module Name: #include "excep.h" -#include "syncclean.hpp" #include "ebr.h" #include "threadsuspend.h" From cfedbdfa1c7fa7ce816ac72901a37a43696ada1b Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Thu, 12 Feb 2026 16:35:09 -0800 Subject: [PATCH 05/24] Fix EBR QueueForDeletion OOM safety and Rehash GetSize misuse - QueueForDeletion: leak object on OOM instead of immediate deletion, which could cause use-after-free for concurrent EBR readers. Track leaked count via InterlockedIncrement counter. - Rehash: read obsolete bucket size directly from allocation base instead of calling GetSize with wrong pointer (undefined behavior). --- src/coreclr/vm/ebr.cpp | 13 ++++++++----- src/coreclr/vm/hash.cpp | 3 ++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 3798b56f1256d5..7061c5746a1bda 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -45,6 +45,9 @@ static thread_local EbrThreadData* t_pThreadData = nullptr; // Global EBR collector for HashMap's async mode. EbrCollector g_HashMapEbr; +// Count of objects leaked due to OOM when queuing for deferred deletion. +static LONG s_ebrLeakedDeletionCount = 0; + // ============================================ // EbrCollector implementation // ============================================ @@ -391,11 +394,11 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(); if (pEntry == nullptr) { - // If we can't allocate, delete immediately. This is safe because we're - // in a critical region and hold the writer lock, so no readers can be - // referencing pObject (the writer just replaced it atomically). - // This is a degraded fallback, not the normal path. - pfnDelete(pObject); + // If we can't allocate, we must not delete pObject immediately, because + // EBR readers in async mode may still be traversing data structures that + // reference it. As a degraded fallback, intentionally leak pObject rather + // than risk a use-after-free. This path should be extremely rare. + InterlockedIncrement(&s_ebrLeakedDeletionCount); return; } diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index cec068ed4d85c8..d02920fca6f5e0 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -1008,10 +1008,11 @@ void HashMap::Rehash() // In async mode, readers may still be traversing the old bucket array. // Queue for deferred deletion via EBR. The buckets will be freed once // all threads have exited their critical regions. + size_t obsoleteSize = ((size_t*)pObsoleteTables)[0]; g_HashMapEbr.QueueForDeletion( pObsoleteTables, DeleteObsoleteBuckets, - (GetSize(pObsoleteTables) + 1) * sizeof(Bucket)); + (obsoleteSize + 1) * sizeof(Bucket)); } else { From c7dfb9be2d338347cff8b8353d14b6fbc0444ffe Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 10:14:47 -0800 Subject: [PATCH 06/24] Address PR feedback: Shutdown guard, simplified asserts, comment fixes - Shutdown: early-return if !m_initialized instead of asserting - Buckets()/Rehash(): simplify assert to !m_fAsyncMode || InCriticalRegion() - LookupValue: remove GC thread exclusion from EBR critical region - Comment fixes in InsertValue and Rehash deferred deletion --- src/coreclr/vm/ebr.cpp | 3 ++- src/coreclr/vm/hash.cpp | 13 +++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 7061c5746a1bda..1f54642f932d1e 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -87,7 +87,8 @@ EbrCollector::Shutdown() } CONTRACTL_END; - _ASSERTE(m_initialized); + if (!m_initialized) + return; // Drain all pending queues unconditionally. { diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index d02920fca6f5e0..c50b087c716787 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -125,7 +125,7 @@ PTR_Bucket HashMap::Buckets() LIMITED_METHOD_DAC_CONTRACT; #if !defined(DACCESS_COMPILE) - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || g_HashMapEbr.InCriticalRegion() || IsGCThread()); + _ASSERTE (!m_fAsyncMode || g_HashMapEbr.InCriticalRegion()); #endif return m_rgBuckets + 1; } @@ -500,7 +500,7 @@ void HashMap::InsertValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); // Enter EBR critical region to protect against concurrent bucket array - // replacement during async mode. + // deletion during async mode. EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); @@ -565,10 +565,7 @@ UPTR HashMap::LookupValue(UPTR key, UPTR value) #ifndef DACCESS_COMPILE _ASSERTE (m_fAsyncMode || OwnLock()); - // Enter EBR critical region to protect against concurrent bucket array - // replacement during async mode. The GC thread is excluded - // since it cannot race with itself on RestartEE. - EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode && !IsGCThread()); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // This is necessary in case some other thread @@ -886,7 +883,7 @@ void HashMap::Rehash() EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || g_HashMapEbr.InCriticalRegion()); + _ASSERTE (!m_fAsyncMode || g_HashMapEbr.InCriticalRegion()); _ASSERTE (OwnLock()); UPTR newPrimeIndex = NewSize(); @@ -1007,7 +1004,7 @@ void HashMap::Rehash() { // In async mode, readers may still be traversing the old bucket array. // Queue for deferred deletion via EBR. The buckets will be freed once - // all threads have exited their critical regions. + // all threads have exited their critical regions or later. size_t obsoleteSize = ((size_t*)pObsoleteTables)[0]; g_HashMapEbr.QueueForDeletion( pObsoleteTables, From fc4aece4107a6dd80e48b39bca5d8dd07a00329e Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 10:43:40 -0800 Subject: [PATCH 07/24] Clean up per-thread EBR state on thread exit Add EbrCollector::ThreadDetach() to unlink and free per-thread EBR data. Call it from ThreadDetaching() in corhost.cpp, following the existing StressLog::ThreadDetach() pattern. This prevents unbounded growth of the EBR thread list in processes with short-lived threads. --- src/coreclr/vm/corhost.cpp | 3 +++ src/coreclr/vm/ebr.cpp | 21 +++++++++++++++++++++ src/coreclr/vm/ebr.h | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/src/coreclr/vm/corhost.cpp b/src/coreclr/vm/corhost.cpp index b3b83121f9ba1c..875ecb36d5e126 100644 --- a/src/coreclr/vm/corhost.cpp +++ b/src/coreclr/vm/corhost.cpp @@ -37,6 +37,7 @@ #endif // !TARGET_UNIX #include "nativelibrary.h" +#include "ebr.h" #ifndef DACCESS_COMPILE @@ -1081,6 +1082,8 @@ void ThreadDetaching() #endif } + g_HashMapEbr.ThreadDetach(); + #ifdef ENABLE_CONTRACTS_IMPL if (t_pClrDebugState != NULL) { diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 1f54642f932d1e..7ca06d2ba8090f 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -262,6 +262,27 @@ EbrCollector::InCriticalRegion() return pData->m_criticalDepth > 0; } +void +EbrCollector::ThreadDetach() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + EbrThreadData* pData = t_pThreadData; + if (pData == nullptr || pData->m_pCollector != this) + return; + + _ASSERTE(!InCriticalRegion()); + + UnlinkThreadData(pData); + t_pThreadData = nullptr; + delete pData; +} + // ============================================ // Epoch advancement // ============================================ diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 995f3498bd8138..17ee6f4c86311c 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -83,6 +83,10 @@ class EbrCollector final // Returns true if the calling thread is currently in a critical region. bool InCriticalRegion(); + // Detach the calling thread from this collector. Unlinks and frees per-thread + // EBR state. Should be called during thread shutdown. + void ThreadDetach(); + private: // Thread list management EbrThreadData* GetOrCreateThreadData(); From 4377813f064650c3b248df699967342ed865d793 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 12:02:45 -0800 Subject: [PATCH 08/24] Use thread_local value for EbrThreadData instead of heap allocation Replace thread_local EbrThreadData* with thread_local EbrThreadData value, eliminating the OOM failure path in GetOrCreateThreadData(). This removes the risk of null dereference in ExitCriticalRegion() when the RAII holder unwinds after a failed EnterCriticalRegion(). Shutdown and ThreadDetach now clear the data with = {} instead of deleting heap memory. --- src/coreclr/vm/ebr.cpp | 44 ++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 7ca06d2ba8090f..5ecc08ab3711f1 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -40,7 +40,7 @@ struct EbrPendingEntry // We support only a single collector per process; if multiple collectors // are needed this can be extended to a per-collector TLS map, but for the // HashMap use-case a single global collector suffices. -static thread_local EbrThreadData* t_pThreadData = nullptr; +static thread_local EbrThreadData t_pThreadData = {}; // Global EBR collector for HashMap's async mode. EbrCollector g_HashMapEbr; @@ -97,15 +97,14 @@ EbrCollector::Shutdown() DrainQueue(i); } - // Free any remaining thread data entries. Threads should have exited - // their critical regions by now. We walk the list and delete them. + // Threads should have exited their critical regions by now. We walk the list and clear them. { CrstHolder lock(&m_threadListLock); EbrThreadData* pCur = m_pThreadListHead; while (pCur != nullptr) { EbrThreadData* pNext = pCur->m_pNext; - delete pCur; + *pCur = {}; // Clear the data to prevent reuse after shutdown. pCur = pNext; } m_pThreadListHead = nullptr; @@ -131,26 +130,15 @@ EbrCollector::GetOrCreateThreadData() } CONTRACTL_END; - EbrThreadData* pData = t_pThreadData; - if (pData != nullptr && pData->m_pCollector == this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector == this) return pData; - // Allocate new per-thread data. - pData = new (nothrow) EbrThreadData(); - if (pData == nullptr) - { - _ASSERTE(!"EBR: failed to allocate thread data"); - return nullptr; - } - pData->m_pCollector = this; pData->m_localEpoch = 0; pData->m_criticalDepth = 0; pData->m_pNext = nullptr; - // Store in thread_local. - t_pThreadData = pData; - // Link into the collector's thread list. { CrstHolder lock(&m_threadListLock); @@ -205,8 +193,7 @@ EbrCollector::EnterCriticalRegion() _ASSERTE(m_initialized); EbrThreadData* pData = GetOrCreateThreadData(); - if (pData == nullptr) - return; + _ASSERTE(pData != nullptr); pData->m_criticalDepth++; @@ -236,8 +223,8 @@ EbrCollector::ExitCriticalRegion() _ASSERTE(m_initialized); - EbrThreadData* pData = t_pThreadData; - _ASSERTE(pData != nullptr && pData->m_pCollector == this); + EbrThreadData* pData = &t_pThreadData; + _ASSERTE(pData->m_pCollector == this); _ASSERTE(pData->m_criticalDepth > 0); pData->m_criticalDepth--; @@ -256,8 +243,8 @@ EbrCollector::InCriticalRegion() { LIMITED_METHOD_CONTRACT; - EbrThreadData* pData = t_pThreadData; - if (pData == nullptr || pData->m_pCollector != this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) return false; return pData->m_criticalDepth > 0; } @@ -272,15 +259,14 @@ EbrCollector::ThreadDetach() } CONTRACTL_END; - EbrThreadData* pData = t_pThreadData; - if (pData == nullptr || pData->m_pCollector != this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) return; _ASSERTE(!InCriticalRegion()); UnlinkThreadData(pData); - t_pThreadData = nullptr; - delete pData; + t_pThreadData = {}; // Clear thread_local to prevent reuse after detach. } // ============================================ @@ -409,8 +395,8 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es _ASSERTE(pfnDelete != nullptr); // Must be in a critical region. - EbrThreadData* pData = t_pThreadData; - _ASSERTE(pData != nullptr && pData->m_pCollector == this && pData->m_criticalDepth > 0); + EbrThreadData* pData = &t_pThreadData; + _ASSERTE(pData->m_pCollector == this && pData->m_criticalDepth > 0); // Allocate pending entry. EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(); From 6aa1f2c0b4732289c2e850ec513d2ea06fcd8967 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 14:57:55 -0800 Subject: [PATCH 09/24] Fix contracts --- src/coreclr/vm/ebr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 5ecc08ab3711f1..c02f43a0e649ae 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -57,7 +57,7 @@ EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryB { CONTRACTL { - NOTHROW; + THROWS; GC_NOTRIGGER; } CONTRACTL_END; From 5c554da97b6c5ae8fc9517b9f03e4c950a511798 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 16:24:52 -0800 Subject: [PATCH 10/24] Centralize bucket allocation with AllocateBuckets/FreeBuckets Introduce static AllocateBuckets and FreeBuckets helpers to ensure consistent BYTE[] allocation and deallocation of bucket arrays. Move GetSize/SetSize from HashMap members to file-static functions. Remove vestigial NextObsolete and chain-traversal loop from DeleteObsoleteBuckets since EBR queues each array independently. --- src/coreclr/vm/hash.cpp | 88 +++++++++++++++++------------------------ src/coreclr/vm/hash.h | 2 - 2 files changed, 37 insertions(+), 53 deletions(-) diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index c50b087c716787..5d64efdc82edf4 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -91,29 +91,48 @@ BOOL Bucket::InsertValue(const UPTR key, const UPTR value) SetCollision(); // otherwise set the collision bit return false; } - #endif // !DACCESS_COMPILE -//--------------------------------------------------------------------- -// inline Bucket*& NextObsolete (Bucket* rgBuckets) -// get the next obsolete bucket in the chain -static Bucket*& NextObsolete(Bucket* rgBuckets) +static DWORD GetSize(PTR_Bucket rgBuckets) +{ + LIMITED_METHOD_DAC_CONTRACT; + PTR_size_t pSize = dac_cast(rgBuckets - 1); + _ASSERTE(FitsIn(pSize[0])); + return static_cast(pSize[0]); +} + +static void SetSize(Bucket* rgBuckets, size_t size) { LIMITED_METHOD_CONTRACT; - return *(Bucket**)&((size_t*)rgBuckets)[1]; + ((size_t*)rgBuckets)[0] = size; +} + +// Allocate a zero-initialized bucket array with space for 'size' buckets +// plus a leading size_t header. +static Bucket* AllocateBuckets(DWORD size) +{ + STATIC_CONTRACT_THROWS; + S_SIZE_T cbAlloc = (S_SIZE_T(size) + S_SIZE_T(1)) * S_SIZE_T(sizeof(Bucket)); + if (cbAlloc.IsOverflow()) + ThrowHR(COR_E_OVERFLOW); + Bucket* rgBuckets = (Bucket*) new BYTE[cbAlloc.Value()]; + memset(rgBuckets, 0, cbAlloc.Value()); + SetSize(rgBuckets, size); + return rgBuckets; +} + +// Free a bucket array allocated by AllocateBuckets. +static void FreeBuckets(Bucket* rgBuckets) +{ + LIMITED_METHOD_CONTRACT; + delete [] (BYTE*)rgBuckets; } // Static helper for EBR deferred deletion of obsolete bucket arrays. static void DeleteObsoleteBuckets(void* p) { LIMITED_METHOD_CONTRACT; - Bucket* pBucket = (Bucket*)p; - while (pBucket) - { - Bucket* pNextBucket = NextObsolete(pBucket); - delete [] pBucket; - pBucket = pNextBucket; - } + FreeBuckets((Bucket*)p); } //--------------------------------------------------------------------- @@ -130,19 +149,6 @@ PTR_Bucket HashMap::Buckets() return m_rgBuckets + 1; } -//--------------------------------------------------------------------- -// inline size_t HashMap::GetSize(PTR_Bucket rgBuckets) -// get the number of buckets -inline -DWORD HashMap::GetSize(PTR_Bucket rgBuckets) -{ - LIMITED_METHOD_DAC_CONTRACT; - PTR_size_t pSize = dac_cast(rgBuckets - 1); - _ASSERTE(FitsIn(pSize[0])); - return static_cast(pSize[0]); -} - - //--------------------------------------------------------------------- // inline size_t HashMap::HashFunction(UPTR key, UINT numBuckets, UINT &seed, UINT &incr) // get the first & second hash function. @@ -166,16 +172,6 @@ void HashMap::HashFunction(const UPTR key, const UINT numBuckets, UINT &seed, UI #ifndef DACCESS_COMPILE -//--------------------------------------------------------------------- -// inline void HashMap::SetSize(Bucket *rgBuckets, size_t size) -// set the number of buckets -inline -void HashMap::SetSize(Bucket *rgBuckets, size_t size) -{ - LIMITED_METHOD_CONTRACT; - ((size_t*)rgBuckets)[0] = size; -} - //--------------------------------------------------------------------- // HashMap::HashMap() // constructor, initialize all values @@ -290,10 +286,7 @@ void HashMap::Init(DWORD cbInitialSize, Compare* pCompare, BOOL fAsyncMode, Lock DWORD size = g_rgPrimes[m_iPrimeIndex]; _ASSERTE(size < 0x7fffffff); - m_rgBuckets = new Bucket[size+1]; - - memset (m_rgBuckets, 0, (size+1)*sizeof(Bucket)); - SetSize(m_rgBuckets, size); + m_rgBuckets = AllocateBuckets(size); m_pCompare = pCompare; @@ -376,7 +369,7 @@ void HashMap::Clear() STATIC_CONTRACT_FORBID_FAULT; // free the current table - delete [] m_rgBuckets; + FreeBuckets(m_rgBuckets); m_rgBuckets = NULL; } @@ -902,14 +895,7 @@ void HashMap::Rehash() Bucket* rgBuckets = Buckets(); UPTR cbCurrSize = GetSize(rgBuckets); - S_SIZE_T cbNewBuckets = (S_SIZE_T(cbNewSize) + S_SIZE_T(1)) * S_SIZE_T(sizeof(Bucket)); - - if (cbNewBuckets.IsOverflow()) - ThrowHR(COR_E_OVERFLOW); - - Bucket* rgNewBuckets = (Bucket *) new BYTE[cbNewBuckets.Value()]; - memset (rgNewBuckets, 0, cbNewBuckets.Value()); - SetSize(rgNewBuckets, cbNewSize); + Bucket* rgNewBuckets = AllocateBuckets(cbNewSize); // current valid slots UPTR cbValidSlots = m_cbInserts-m_cbDeletes; @@ -1241,10 +1227,10 @@ void HashMap::LookupPerfTest(HashMap * table, const unsigned int MinThreshold) table->LookupValue(i, i); //cout << "Lookup perf test (1000 * " << MinThreshold << ": " << (t1-t0) << " ms." << endl; #ifdef HASHTABLE_PROFILE - minipal_log_print_info("Lookup perf test time: %d ms table size: %d max failure probe: %d longest collision chain: %d\n", (int) (t1-t0), (int) table->GetSize(table->Buckets()), (int) table->maxFailureProbe, (int) table->m_cbMaxCollisionLength); + minipal_log_print_info("Lookup perf test time: %d ms table size: %d max failure probe: %d longest collision chain: %d\n", (int) (t1-t0), (int) GetSize(table->Buckets()), (int) table->maxFailureProbe, (int) table->m_cbMaxCollisionLength); table->DumpStatistics(); #else // !HASHTABLE_PROFILE - minipal_log_print_info("Lookup perf test time: %d ms table size: %d\n", (int) (t1-t0), table->GetSize(table->Buckets())); + minipal_log_print_info("Lookup perf test time: %d ms table size: %d\n", (int) (t1-t0), GetSize(table->Buckets())); #endif // !HASHTABLE_PROFILE } #endif // !DACCESS_COMPILE diff --git a/src/coreclr/vm/hash.h b/src/coreclr/vm/hash.h index 768da9dd07e012..e4c998dbf43307 100644 --- a/src/coreclr/vm/hash.h +++ b/src/coreclr/vm/hash.h @@ -350,8 +350,6 @@ class HashMap UPTR NewSize(); // create a new bucket array and rehash the non-deleted entries void Rehash(); - static DWORD GetSize(PTR_Bucket rgBuckets); - static void SetSize(Bucket* rgBuckets, size_t size); PTR_Bucket Buckets(); UPTR CompareValues(const UPTR value1, const UPTR value2); From a4f7ee6258c67aa1ba2c1478fe39e5b976862530 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 16:35:32 -0800 Subject: [PATCH 11/24] Move free calls outside lock and add EbrPendingEntry constructor Split DrainQueue into DetachQueue (under lock) and DeletePendingEntries (outside lock) so CRT free calls don't hold m_pendingLock. Add EbrPendingEntry constructor to initialize fields at allocation time. --- src/coreclr/vm/ebr.cpp | 81 ++++++++++++++++++++++++++---------------- src/coreclr/vm/ebr.h | 2 +- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index c02f43a0e649ae..bcec5bf995f8a8 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -28,8 +28,15 @@ struct EbrThreadData }; // Singly-linked list node for pending deletions. -struct EbrPendingEntry +struct EbrPendingEntry final { + EbrPendingEntry(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) + : m_pObject{ pObject } + , m_pfnDelete{ pfnDelete } + , m_estimatedSize{ estimatedSize } + , m_pNext{ nullptr } + {} + void* m_pObject; EbrDeleteFunc m_pfnDelete; size_t m_estimatedSize; @@ -43,6 +50,9 @@ struct EbrPendingEntry static thread_local EbrThreadData t_pThreadData = {}; // Global EBR collector for HashMap's async mode. +// If you want to add another usage for Ebr in the future, please consider +// the tradeoffs between creating multiple collectors or treating this as +// a single shared global collector. EbrCollector g_HashMapEbr; // Count of objects leaked due to OOM when queuing for deferred deletion. @@ -77,6 +87,24 @@ EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryB m_initialized = true; } +// Delete all entries in a detached pending list. Must be called outside m_pendingLock. +// Returns the total estimated size freed. +static size_t DeletePendingEntries(EbrPendingEntry* pEntry) +{ + LIMITED_METHOD_CONTRACT; + + size_t freedSize = 0; + while (pEntry != nullptr) + { + EbrPendingEntry* pNext = pEntry->m_pNext; + pEntry->m_pfnDelete(pEntry->m_pObject); + freedSize += pEntry->m_estimatedSize; + delete pEntry; + pEntry = pNext; + } + return freedSize; +} + void EbrCollector::Shutdown() { @@ -91,11 +119,14 @@ EbrCollector::Shutdown() return; // Drain all pending queues unconditionally. + EbrPendingEntry* pDetached[EBR_EPOCHS]; { CrstHolder lock(&m_pendingLock); for (uint32_t i = 0; i < EBR_EPOCHS; i++) - DrainQueue(i); + pDetached[i] = DetachQueue(i); } + for (uint32_t i = 0; i < EBR_EPOCHS; i++) + DeletePendingEntries(pDetached[i]); // Threads should have exited their critical regions by now. We walk the list and clear them. { @@ -329,26 +360,16 @@ EbrCollector::TryAdvanceEpoch() // Deferred deletion // ============================================ -size_t -EbrCollector::DrainQueue(uint32_t slot) +// Detach the pending list for a given slot. Must be called under m_pendingLock. +// Returns the head of the detached list. +EbrPendingEntry* +EbrCollector::DetachQueue(uint32_t slot) { LIMITED_METHOD_CONTRACT; - size_t freedSize = 0; - - EbrPendingEntry* pEntry = m_pPendingHeads[slot]; + EbrPendingEntry* pHead = m_pPendingHeads[slot]; m_pPendingHeads[slot] = nullptr; - - while (pEntry != nullptr) - { - EbrPendingEntry* pNext = pEntry->m_pNext; - pEntry->m_pfnDelete(pEntry->m_pObject); - freedSize += pEntry->m_estimatedSize; - delete pEntry; - pEntry = pNext; - } - - return freedSize; + return pHead; } void @@ -363,14 +384,19 @@ EbrCollector::TryReclaim() if (TryAdvanceEpoch()) { - CrstHolder lock(&m_pendingLock); + EbrPendingEntry* pDetached; + { + CrstHolder lock(&m_pendingLock); + + // Objects retired 2 epochs ago are safe to delete. With 3 epochs + // and clock arithmetic, the safe slot is (current + 1) % 3. + uint32_t currentEpoch = m_globalEpoch; + uint32_t safeSlot = (currentEpoch + 1) % EBR_EPOCHS; - // Objects retired 2 epochs ago are safe to delete. With 3 epochs - // and clock arithmetic, the safe slot is (current + 1) % 3. - uint32_t currentEpoch = m_globalEpoch; - uint32_t safeSlot = (currentEpoch + 1) % EBR_EPOCHS; + pDetached = DetachQueue(safeSlot); + } - size_t freed = DrainQueue(safeSlot); + size_t freed = DeletePendingEntries(pDetached); if (freed > 0) { _ASSERTE((size_t)m_pendingSizeInBytes >= freed); @@ -399,7 +425,7 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es _ASSERTE(pData->m_pCollector == this && pData->m_criticalDepth > 0); // Allocate pending entry. - EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(); + EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(pObject, pfnDelete, estimatedSize); if (pEntry == nullptr) { // If we can't allocate, we must not delete pObject immediately, because @@ -410,11 +436,6 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es return; } - pEntry->m_pObject = pObject; - pEntry->m_pfnDelete = pfnDelete; - pEntry->m_estimatedSize = estimatedSize; - pEntry->m_pNext = nullptr; - // Insert into the current epoch's pending list. { CrstHolder lock(&m_pendingLock); diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 17ee6f4c86311c..2c458785d8c3b5 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -97,7 +97,7 @@ class EbrCollector final bool TryAdvanceEpoch(); // Reclamation - size_t DrainQueue(uint32_t slot); + EbrPendingEntry* DetachQueue(uint32_t slot); void TryReclaim(); // Configuration From 98852e919b01b7057d7f5ce18089dd0f8edd9ce1 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Fri, 13 Feb 2026 16:51:57 -0800 Subject: [PATCH 12/24] Move EBR reclamation to finalizer thread Remove hardcoded memory budget and inline reclamation from QueueForDeletion. Instead, hook into the finalizer thread via HaveExtraWorkForFinalizer/DoExtraWorkForFinalizer to perform epoch advancement and deferred deletion on the background thread, following the existing runtime cleanup pattern. --- src/coreclr/vm/ceemain.cpp | 2 +- src/coreclr/vm/ebr.cpp | 14 ++++++++------ src/coreclr/vm/ebr.h | 16 +++++++++------- src/coreclr/vm/finalizerthread.cpp | 9 ++++++++- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index b72e7abba11886..b138168cd1645c 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -789,7 +789,7 @@ void EEStartupHelper() // Initialize EBR (Epoch-Based Reclamation) for HashMap's async mode. // This must be done before any HashMap is initialized with fAsyncMode=TRUE. - g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, /* memoryBudgetInBytes */ 1024 * 1024); + g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending); StubManager::InitializeStubManagers(); diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index bcec5bf995f8a8..bdaed23094781e 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -63,7 +63,7 @@ static LONG s_ebrLeakedDeletionCount = 0; // ============================================ void -EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudgetInBytes) +EbrCollector::Init(CrstType crstThreadList, CrstType crstPending) { CONTRACTL { @@ -74,7 +74,6 @@ EbrCollector::Init(CrstType crstThreadList, CrstType crstPending, size_t memoryB _ASSERTE(!m_initialized); - m_memoryBudgetInBytes = memoryBudgetInBytes; m_globalEpoch = 0; m_pendingSizeInBytes = 0; m_pThreadListHead = nullptr; @@ -405,6 +404,13 @@ EbrCollector::TryReclaim() } } +bool +EbrCollector::CleanupRequested() +{ + LIMITED_METHOD_CONTRACT; + return m_initialized && (size_t)m_pendingSizeInBytes > 0; +} + void EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) { @@ -445,8 +451,4 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es m_pPendingHeads[slot] = pEntry; m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes + estimatedSize; } - - // Try reclamation if over budget. - if ((size_t)m_pendingSizeInBytes > m_memoryBudgetInBytes) - TryReclaim(); } diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 2c458785d8c3b5..326ba268761c5b 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -57,9 +57,7 @@ class EbrCollector final // Initialize the collector. // crstThreadList: Crst type for the thread list lock // crstPending: Crst type for the pending deletion queue lock - // memoryBudgetInBytes: approximate byte threshold of pending deletions before - // attempting reclamation - void Init(CrstType crstThreadList, CrstType crstPending, size_t memoryBudgetInBytes); + void Init(CrstType crstThreadList, CrstType crstPending); // Shutdown the collector, draining all pending deletions. // All threads should have exited their critical regions before calling. @@ -77,7 +75,7 @@ class EbrCollector final // threads have passed through a quiescent state. // pObject: the object to retire (must not be nullptr) // pfnDelete: function to call to delete the object - // estimatedSize: approximate size in bytes (for budget tracking) + // estimatedSize: approximate size in bytes (for tracking) void QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize); // Returns true if the calling thread is currently in a critical region. @@ -87,6 +85,12 @@ class EbrCollector final // EBR state. Should be called during thread shutdown. void ThreadDetach(); + // Returns true if there are pending deletions that may be reclaimable. + bool CleanupRequested(); + + // Attempt to advance the epoch and reclaim safe pending deletions. + void TryReclaim(); + private: // Thread list management EbrThreadData* GetOrCreateThreadData(); @@ -98,10 +102,8 @@ class EbrCollector final // Reclamation EbrPendingEntry* DetachQueue(uint32_t slot); - void TryReclaim(); - // Configuration - size_t m_memoryBudgetInBytes = 0; + // State bool m_initialized = false; // Global epoch counter [0, EBR_EPOCHS-1] diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp index 38ccdae6713a19..abdc14b67a9f5a 100644 --- a/src/coreclr/vm/finalizerthread.cpp +++ b/src/coreclr/vm/finalizerthread.cpp @@ -9,6 +9,7 @@ #include "jithost.h" #include "genanalysis.h" #include "eventpipeadapter.h" +#include "ebr.h" #include "dn-stdio.h" #ifdef FEATURE_COMINTEROP @@ -151,7 +152,8 @@ bool FinalizerThread::HaveExtraWorkForFinalizer() || Thread::CleanupNeededForFinalizedThread() || YieldProcessorNormalization::IsMeasurementScheduled() || HasDelayedDynamicMethod() - || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads(); + || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads() + || g_HashMapEbr.CleanupRequested(); #endif // TARGET_WASM } @@ -201,6 +203,11 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread) GCX_PREEMP(); CleanupDelayedDynamicMethods(); } + + if (g_HashMapEbr.CleanupRequested()) + { + g_HashMapEbr.TryReclaim(); + } } OBJECTREF FinalizerThread::GetNextFinalizableObject() From 958ce3aaede5b4820d931fdcccb223007afbe2d5 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Tue, 17 Feb 2026 09:59:47 -0800 Subject: [PATCH 13/24] Add STANDARD_VM_CONTRACT to NativeImage::Initialize and update Crst initialization in ReadyToRunInfo --- src/coreclr/vm/nativeimage.cpp | 2 ++ src/coreclr/vm/readytoruninfo.cpp | 10 ++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/vm/nativeimage.cpp b/src/coreclr/vm/nativeimage.cpp index 1d6a0593d09897..2be7199c172029 100644 --- a/src/coreclr/vm/nativeimage.cpp +++ b/src/coreclr/vm/nativeimage.cpp @@ -63,6 +63,8 @@ NativeImage::NativeImage(AssemblyBinder *pAssemblyBinder, ReadyToRunLoadedImage void NativeImage::Initialize(READYTORUN_HEADER *pHeader, LoaderAllocator *pLoaderAllocator, AllocMemTracker *pamTracker) { + STANDARD_VM_CONTRACT; + LoaderHeap *pHeap = pLoaderAllocator->GetHighFrequencyHeap(); m_pReadyToRunInfo = new ReadyToRunInfo(/*pModule*/ NULL, pLoaderAllocator, pHeader, this, m_pImageLayout, pamTracker); diff --git a/src/coreclr/vm/readytoruninfo.cpp b/src/coreclr/vm/readytoruninfo.cpp index 3cd42e18b8ff5a..cb5c93fd792b06 100644 --- a/src/coreclr/vm/readytoruninfo.cpp +++ b/src/coreclr/vm/readytoruninfo.cpp @@ -386,18 +386,12 @@ void ReadyToRunInfo::SetMethodDescForEntryPointInNativeImage(PCODE entryPoint, M { CONTRACTL { + STANDARD_VM_CHECK; PRECONDITION(!m_isComponentAssembly); } CONTRACTL_END; - // We are entering coop mode here so that we don't do it later inside LookupMap while we are already holding the Crst. - // Doing it in the other order can block the debugger from running func-evals. For example thread A would acquire the Crst, - // then block at the coop transition inside LookupMap waiting for the debugger to resume from a break state. The debugger then - // requests thread B to run a funceval, the funceval tries to load some R2R method calling in here, then it blocks because - // thread A is holding the Crst. - GCX_COOP(); CrstHolder ch(&m_Crst); - if ((TADDR)m_entryPointToMethodDescMap.LookupValue(PCODEToPINSTR(entryPoint), (LPVOID)PCODEToPINSTR(entryPoint)) == (TADDR)INVALIDENTRY) { m_entryPointToMethodDescMap.InsertValue(PCODEToPINSTR(entryPoint), methodDesc); @@ -774,7 +768,7 @@ ReadyToRunInfo::ReadyToRunInfo(Module * pModule, LoaderAllocator* pLoaderAllocat m_pHeader(pHeader), m_pNativeImage(pModule != NULL ? pNativeImage: NULL), // m_pNativeImage is only set for composite image components, not the composite R2R info itself m_readyToRunCodeDisabled(FALSE), - m_Crst(CrstReadyToRunEntryPointToMethodDescMap, CRST_UNSAFE_COOPGC), + m_Crst(CrstReadyToRunEntryPointToMethodDescMap), m_pPersistentInlineTrackingMap(NULL), m_pNextR2RForUnrelatedCode(NULL) { From 6657f0c587b2f722009b6a51d93bc2b5a88be658 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Tue, 17 Feb 2026 14:10:36 -0800 Subject: [PATCH 14/24] Refactor EBR shutdown handling and improve bucket management in HashMap --- src/coreclr/vm/ceemain.cpp | 2 - src/coreclr/vm/ebr.cpp | 54 ++-------------- src/coreclr/vm/ebr.h | 13 ++-- src/coreclr/vm/hash.cpp | 129 +++++++++++++++++++++---------------- src/coreclr/vm/hash.h | 2 +- 5 files changed, 85 insertions(+), 115 deletions(-) diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index b138168cd1645c..85e143601d266f 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -1386,8 +1386,6 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading) #ifdef LOGGING ShutdownLogging(); #endif - // Shutdown EBR before GC heap to ensure all deferred deletions are drained. - g_HashMapEbr.Shutdown(); GCHeapUtilities::GetGCHeap()->Shutdown(); } diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index bdaed23094781e..df5f267741a225 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -55,9 +55,6 @@ static thread_local EbrThreadData t_pThreadData = {}; // a single shared global collector. EbrCollector g_HashMapEbr; -// Count of objects leaked due to OOM when queuing for deferred deletion. -static LONG s_ebrLeakedDeletionCount = 0; - // ============================================ // EbrCollector implementation // ============================================ @@ -104,48 +101,6 @@ static size_t DeletePendingEntries(EbrPendingEntry* pEntry) return freedSize; } -void -EbrCollector::Shutdown() -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - } - CONTRACTL_END; - - if (!m_initialized) - return; - - // Drain all pending queues unconditionally. - EbrPendingEntry* pDetached[EBR_EPOCHS]; - { - CrstHolder lock(&m_pendingLock); - for (uint32_t i = 0; i < EBR_EPOCHS; i++) - pDetached[i] = DetachQueue(i); - } - for (uint32_t i = 0; i < EBR_EPOCHS; i++) - DeletePendingEntries(pDetached[i]); - - // Threads should have exited their critical regions by now. We walk the list and clear them. - { - CrstHolder lock(&m_threadListLock); - EbrThreadData* pCur = m_pThreadListHead; - while (pCur != nullptr) - { - EbrThreadData* pNext = pCur->m_pNext; - *pCur = {}; // Clear the data to prevent reuse after shutdown. - pCur = pNext; - } - m_pThreadListHead = nullptr; - } - - m_threadListLock.Destroy(); - m_pendingLock.Destroy(); - - m_initialized = false; -} - // ============================================ // Thread registration // ============================================ @@ -411,7 +366,7 @@ EbrCollector::CleanupRequested() return m_initialized && (size_t)m_pendingSizeInBytes > 0; } -void +bool EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) { CONTRACTL @@ -436,10 +391,8 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es { // If we can't allocate, we must not delete pObject immediately, because // EBR readers in async mode may still be traversing data structures that - // reference it. As a degraded fallback, intentionally leak pObject rather - // than risk a use-after-free. This path should be extremely rare. - InterlockedIncrement(&s_ebrLeakedDeletionCount); - return; + // reference it. + return false; } // Insert into the current epoch's pending list. @@ -451,4 +404,5 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es m_pPendingHeads[slot] = pEntry; m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes + estimatedSize; } + return true; } diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 326ba268761c5b..4eb479a08debe4 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -24,7 +24,8 @@ // g_HashMapEbr.QueueForDeletion(pOldData, deleteFn, sizeEstimate); // // // Shutdown: -// g_HashMapEbr.Shutdown(); +// The EBR collector doesn't support a shutdown feature. CoreCLR doesn't support +// clean shutdown. #ifndef __EBR_H__ #define __EBR_H__ @@ -59,10 +60,6 @@ class EbrCollector final // crstPending: Crst type for the pending deletion queue lock void Init(CrstType crstThreadList, CrstType crstPending); - // Shutdown the collector, draining all pending deletions. - // All threads should have exited their critical regions before calling. - void Shutdown(); - // Enter a critical region. While in a critical region, objects queued for // deletion will not be freed. Re-entrant: nested calls are counted. void EnterCriticalRegion(); @@ -76,7 +73,11 @@ class EbrCollector final // pObject: the object to retire (must not be nullptr) // pfnDelete: function to call to delete the object // estimatedSize: approximate size in bytes (for tracking) - void QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize); + // Returns true if the object was successfully queued for deletion, false if + // the queue allocation failed (in which case the object was not queued and will not be deleted). + // Note: if queuing fails, the caller is responsible for ensuring the object is eventually deleted, + // either by retrying the queue or by deleting it directly if safe to do so. + bool QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize); // Returns true if the calling thread is currently in a critical region. bool InCriticalRegion(); diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index 5d64efdc82edf4..4c565da2b6f78a 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -135,6 +135,15 @@ static void DeleteObsoleteBuckets(void* p) FreeBuckets((Bucket*)p); } +// The +1 is because entries are 1 based since the first entry is a size field, not a bucket. +// See Buckets() method that works with the member variable m_rgBuckets. +// See GetSize() and SetSize() for how the size field is stored. +static PTR_Bucket GetBucketPointer(PTR_Bucket rgBuckets) +{ + LIMITED_METHOD_DAC_CONTRACT; + return rgBuckets + 1; +} + //--------------------------------------------------------------------- // inline Bucket* HashMap::Buckets() // get the pointer to the bucket array @@ -146,7 +155,7 @@ PTR_Bucket HashMap::Buckets() #if !defined(DACCESS_COMPILE) _ASSERTE (!m_fAsyncMode || g_HashMapEbr.InCriticalRegion()); #endif - return m_rgBuckets + 1; + return GetBucketPointer(m_rgBuckets); } //--------------------------------------------------------------------- @@ -832,7 +841,7 @@ UPTR HashMap::PutEntry (Bucket* rgBuckets, UPTR key, UPTR value) // compute the new size based on the number of free slots // inline -UPTR HashMap::NewSize() +UPTR HashMap::NewSize() const { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -888,39 +897,31 @@ void HashMap::Rehash() return; } - m_iPrimeIndex = newPrimeIndex; - - DWORD cbNewSize = g_rgPrimes[m_iPrimeIndex]; - - Bucket* rgBuckets = Buckets(); - UPTR cbCurrSize = GetSize(rgBuckets); + // Collect the current bucket state. + Bucket* rgCurrentBuckets = Buckets(); + DWORD currentBucketsSize = GetSize(rgCurrentBuckets); + // Allocate a new array of buckets. + const DWORD cbNewSize = g_rgPrimes[newPrimeIndex]; Bucket* rgNewBuckets = AllocateBuckets(cbNewSize); - // current valid slots - UPTR cbValidSlots = m_cbInserts-m_cbDeletes; - m_cbInserts = cbValidSlots; // reset insert count to the new valid count - m_cbPrevSlotsInUse = cbValidSlots; // track the previous delete count - m_cbDeletes = 0; // reset delete count - // rehash table into it - + // Rehash table into new buckets. + UPTR cbValidSlots = m_cbInserts - m_cbDeletes; + const UPTR cbValidSlotsInit = cbValidSlots; if (cbValidSlots) // if there are valid slots to be rehashed { - for (unsigned int nb = 0; nb < cbCurrSize; nb++) + for (DWORD nb = 0; nb < currentBucketsSize; nb++) { - for (unsigned int i = 0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { - UPTR key =rgBuckets[nb].m_rgKeys[i]; + UPTR key = rgCurrentBuckets[nb].m_rgKeys[i]; if (key > DELETED) { + UPTR ntry = PutEntry(GetBucketPointer(rgNewBuckets), key, rgCurrentBuckets[nb].GetValue (i)); #ifdef HASHTABLE_PROFILE - UPTR ntry = -#endif - PutEntry (rgNewBuckets+1, key, rgBuckets[nb].GetValue (i)); - #ifdef HASHTABLE_PROFILE - if(ntry >=8) - m_cbInsertProbesGt8++; - #endif // HASHTABLE_PROFILE + if(ntry >=8) + m_cbInsertProbesGt8++; +#endif // HASHTABLE_PROFILE // check if we can bail out if (--cbValidSlots == 0) @@ -930,41 +931,67 @@ void HashMap::Rehash() } //for all buckets } - LDone: - Bucket* pObsoleteTables = m_rgBuckets; + // Capture the current buckets pointer for later deletion if needed. + // See the Buckets() APIs for why the field is used directly. + void* pObsoleteBucketsAlloc = m_rgBuckets; + if (m_fAsyncMode) + { + // In async mode, readers may still be traversing the old bucket array. + // Queue for deferred deletion via EBR. The buckets will be freed once + // all threads have exited their critical regions or later. + // If we fail to queue for deletion, throw an OOM. + size_t obsoleteSize = currentBucketsSize; + if (!g_HashMapEbr.QueueForDeletion( + pObsoleteBucketsAlloc, + DeleteObsoleteBuckets, + (obsoleteSize + 1) * sizeof(Bucket))) // See AllocateBuckets for +1 + { + // If we fail to queue for deletion, free the new allocation before throwing OOM. + FreeBuckets(rgNewBuckets); + ThrowOutOfMemory(); + } + } + + // Rename the variable names so it is clear their state. + Bucket* obsoleteBuckets = rgCurrentBuckets; + DWORD obsoleteBucketsSize = currentBucketsSize; + rgCurrentBuckets = NULL; + currentBucketsSize = 0; // memory barrier, to replace the pointer to array of bucket MemoryBarrier(); - // replace the old array with the new one. + // Update the HashMap state m_rgBuckets = rgNewBuckets; + m_iPrimeIndex = newPrimeIndex; + m_cbInserts = cbValidSlotsInit; // reset insert count to the new valid count + m_cbPrevSlotsInUse = cbValidSlotsInit; // track the previous delete count + m_cbDeletes = 0; // reset delete count - #ifdef HASHTABLE_PROFILE - m_cbRehash++; - m_cbRehashSlots+=m_cbInserts; - m_cbObsoleteTables++; // track statistics - m_cbTotalBuckets += (cbNewSize+1); - #endif // HASHTABLE_PROFILE +#ifdef HASHTABLE_PROFILE + m_cbRehash++; + m_cbRehashSlots += m_cbInserts; + m_cbObsoleteTables++; // track statistics + m_cbTotalBuckets += (cbNewSize + 1); // +1 for the size field. See AllocateBuckets for details. +#endif // HASHTABLE_PROFILE #ifdef _DEBUG - - unsigned nb; + DWORD nb; if (m_fAsyncMode) { // for all non deleted keys in the old table, make sure the corresponding values // are in the new lookup table - - for (nb = 1; nb <= ((size_t*)pObsoleteTables)[0]; nb++) + for (nb = 0; nb < obsoleteBucketsSize; nb++) { - for (unsigned int i =0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { - if (pObsoleteTables[nb].m_rgKeys[i] > DELETED) + if (obsoleteBuckets[nb].m_rgKeys[i] > DELETED) { - UPTR value = pObsoleteTables[nb].GetValue (i); + UPTR value = obsoleteBuckets[nb].GetValue (i); // make sure the value is present in the new table - ASSERT (m_pCompare != NULL || value == LookupValue (pObsoleteTables[nb].m_rgKeys[i], value)); + ASSERT (m_pCompare != NULL || value == LookupValue (obsoleteBuckets[nb].m_rgKeys[i], value)); } } } @@ -974,7 +1001,7 @@ void HashMap::Rehash() // if the compare function provided is null, then keys must be unique for (nb = 0; nb < cbNewSize; nb++) { - for (unsigned int i = 0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { UPTR keyv = Buckets()[nb].m_rgKeys[i]; ASSERT (keyv != DELETED); @@ -986,20 +1013,10 @@ void HashMap::Rehash() } #endif // _DEBUG - if (m_fAsyncMode) - { - // In async mode, readers may still be traversing the old bucket array. - // Queue for deferred deletion via EBR. The buckets will be freed once - // all threads have exited their critical regions or later. - size_t obsoleteSize = ((size_t*)pObsoleteTables)[0]; - g_HashMapEbr.QueueForDeletion( - pObsoleteTables, - DeleteObsoleteBuckets, - (obsoleteSize + 1) * sizeof(Bucket)); - } - else + // If non async mode, we can delete the old buckets immediately since no readers can be traversing it. + if (!m_fAsyncMode) { - DeleteObsoleteBuckets(pObsoleteTables); + DeleteObsoleteBuckets(pObsoleteBucketsAlloc); } } diff --git a/src/coreclr/vm/hash.h b/src/coreclr/vm/hash.h index e4c998dbf43307..99a6a640c87a6c 100644 --- a/src/coreclr/vm/hash.h +++ b/src/coreclr/vm/hash.h @@ -347,7 +347,7 @@ class HashMap // compute the new size, based on the number of free slots // available, compact or expand - UPTR NewSize(); + UPTR NewSize() const; // create a new bucket array and rehash the non-deleted entries void Rehash(); PTR_Bucket Buckets(); From ae8446860ed2fafe14bf53c2afb7bc91111e0fd3 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Tue, 17 Feb 2026 14:49:58 -0800 Subject: [PATCH 15/24] Enhance EBR thread detachment handling and add preemption for reclamation in finalizer thread --- src/coreclr/vm/ceemain.cpp | 1 - src/coreclr/vm/corhost.cpp | 4 ++-- src/coreclr/vm/finalizerthread.cpp | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 85e143601d266f..95f670dbcd6151 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -1386,7 +1386,6 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading) #ifdef LOGGING ShutdownLogging(); #endif - GCHeapUtilities::GetGCHeap()->Shutdown(); } } diff --git a/src/coreclr/vm/corhost.cpp b/src/coreclr/vm/corhost.cpp index 875ecb36d5e126..303db8cb5f83b0 100644 --- a/src/coreclr/vm/corhost.cpp +++ b/src/coreclr/vm/corhost.cpp @@ -1073,6 +1073,8 @@ void ThreadDetaching() // 2. When a fiber is destroyed, or OS calls FlsCallback after DLL_THREAD_DETACH process. // We will null the FLS and TLS entry if it matches the deleted one. + g_HashMapEbr.ThreadDetach(); + if (StressLog::t_pCurrentThreadLog != NULL) { #ifdef STRESS_LOG @@ -1082,8 +1084,6 @@ void ThreadDetaching() #endif } - g_HashMapEbr.ThreadDetach(); - #ifdef ENABLE_CONTRACTS_IMPL if (t_pClrDebugState != NULL) { diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp index abdc14b67a9f5a..287baf16b862be 100644 --- a/src/coreclr/vm/finalizerthread.cpp +++ b/src/coreclr/vm/finalizerthread.cpp @@ -206,6 +206,7 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread) if (g_HashMapEbr.CleanupRequested()) { + GCX_PREEMP(); g_HashMapEbr.TryReclaim(); } } From a3040dbd5b85738feeddf25daba70c1daa81f657 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 11:33:31 -0800 Subject: [PATCH 16/24] Make EBR thread list insert lock-free with lazy deletion Replace the CrstHolder in GetOrCreateThreadData with a lock-free CAS push to fix a contract violation: callers like VSD_ResolveWorker run in GC_NOTRIGGER + MODE_COOPERATIVE where taking a Crst is forbidden. - Add Volatile m_detached to EbrThreadData for lazy logical deletion - GetOrCreateThreadData: lock-free CAS push (InterlockedCompareExchangeT) - UnlinkThreadData: set m_detached = TRUE instead of physical unlink - CanAdvanceEpoch: skip detached nodes, use VolatileLoad for head - TryAdvanceEpoch: serialize scan + advance + prune under m_threadListLock - Rename TryReclaim/CleanupRequested to CleanUpPending/CleanUpRequested - Add finalizer thread assert in CleanUpPending - Add OwnedByCurrentThread assert in DetachQueue - Fix stale comments in ebr.h Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/vm/ebr.cpp | 81 +++++++++++++++++------------- src/coreclr/vm/ebr.h | 12 ++--- src/coreclr/vm/finalizerthread.cpp | 6 +-- 3 files changed, 56 insertions(+), 43 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index df5f267741a225..498fdb15a86b9c 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -3,6 +3,7 @@ #include "common.h" #include "ebr.h" +#include "finalizerthread.h" // ============================================ // Per-thread EBR state @@ -23,6 +24,9 @@ struct EbrThreadData // Nesting depth for re-entrant critical regions. uint32_t m_criticalDepth; + // Logically deleted from the thread list. + Volatile m_detached; + // Intrusive linked list through the collector's thread list. EbrThreadData* m_pNext; }; @@ -122,14 +126,17 @@ EbrCollector::GetOrCreateThreadData() pData->m_pCollector = this; pData->m_localEpoch = 0; pData->m_criticalDepth = 0; + pData->m_detached = FALSE; pData->m_pNext = nullptr; // Link into the collector's thread list. + // See TryAdvanceEpoch for the removal semantics of detached nodes. + EbrThreadData* pHead; + do { - CrstHolder lock(&m_threadListLock); - pData->m_pNext = m_pThreadListHead; - m_pThreadListHead = pData; - } + pHead = VolatileLoad(&m_pThreadListHead); + pData->m_pNext = pHead; + } while (InterlockedCompareExchangeT(&m_pThreadListHead, pData, pHead) != pHead); return pData; } @@ -137,27 +144,13 @@ EbrCollector::GetOrCreateThreadData() void EbrCollector::UnlinkThreadData(EbrThreadData* pData) { - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - } - CONTRACTL_END; + LIMITED_METHOD_CONTRACT; if (pData == nullptr) return; - CrstHolder lock(&m_threadListLock); - EbrThreadData** pp = &m_pThreadListHead; - while (*pp != nullptr) - { - if (*pp == pData) - { - *pp = pData->m_pNext; - break; - } - pp = &(*pp)->m_pNext; - } + // Logically detach: the epoch scanner skips detached nodes. + pData->m_detached = TRUE; } // ============================================ @@ -263,21 +256,22 @@ EbrCollector::CanAdvanceEpoch() { LIMITED_METHOD_CONTRACT; - // Caller must hold m_threadListLock. uint32_t currentEpoch = m_globalEpoch; - EbrThreadData* pData = m_pThreadListHead; + EbrThreadData* pData = VolatileLoad(&m_pThreadListHead); while (pData != nullptr) { - uint32_t localEpoch = pData->m_localEpoch; - bool active = (localEpoch & ACTIVE_FLAG) != 0; - - if (active) + if (!pData->m_detached) { - // If an active thread has not yet observed the current epoch, - // we cannot advance. - if (localEpoch != (currentEpoch | ACTIVE_FLAG)) - return false; + uint32_t localEpoch = pData->m_localEpoch; + bool active = (localEpoch & ACTIVE_FLAG) != 0; + if (active) + { + // If an active thread has not yet observed the current epoch, + // we cannot advance. + if (localEpoch != (currentEpoch | ACTIVE_FLAG)) + return false; + } } pData = pData->m_pNext; @@ -295,7 +289,11 @@ EbrCollector::TryAdvanceEpoch() GC_NOTRIGGER; } CONTRACTL_END; + _ASSERTE(FinalizerThread::IsCurrentThreadFinalizer()); + // Serialize scan, epoch advance, and pruning under the lock. This prevents + // two concurrent callers from double-advancing the epoch and ensures the + // CanAdvanceEpoch result is still valid when we act on it. CrstHolder lock(&m_threadListLock); // Acquire fence to synchronize with the MemoryBarrier() calls in @@ -307,6 +305,19 @@ EbrCollector::TryAdvanceEpoch() uint32_t newEpoch = ((uint32_t)m_globalEpoch + 1) % EBR_EPOCHS; m_globalEpoch = newEpoch; + + // Physically prune detached nodes. New nodes are only ever CAS-pushed at + // the head, so unlinking interior nodes here is safe without interfering + // with concurrent inserts. + EbrThreadData** pp = &m_pThreadListHead; + while (*pp != nullptr) + { + if ((*pp)->m_detached) + *pp = (*pp)->m_pNext; + else + pp = &(*pp)->m_pNext; + } + return true; } @@ -314,12 +325,13 @@ EbrCollector::TryAdvanceEpoch() // Deferred deletion // ============================================ -// Detach the pending list for a given slot. Must be called under m_pendingLock. +// Detach the pending list for a given slot. // Returns the head of the detached list. EbrPendingEntry* EbrCollector::DetachQueue(uint32_t slot) { LIMITED_METHOD_CONTRACT; + _ASSERTE(m_pendingLock.OwnedByCurrentThread()); EbrPendingEntry* pHead = m_pPendingHeads[slot]; m_pPendingHeads[slot] = nullptr; @@ -327,7 +339,7 @@ EbrCollector::DetachQueue(uint32_t slot) } void -EbrCollector::TryReclaim() +EbrCollector::CleanUpPending() { CONTRACTL { @@ -335,6 +347,7 @@ EbrCollector::TryReclaim() GC_NOTRIGGER; } CONTRACTL_END; + _ASSERTE(FinalizerThread::IsCurrentThreadFinalizer()); if (TryAdvanceEpoch()) { @@ -360,7 +373,7 @@ EbrCollector::TryReclaim() } bool -EbrCollector::CleanupRequested() +EbrCollector::CleanUpRequested() { LIMITED_METHOD_CONTRACT; return m_initialized && (size_t)m_pendingSizeInBytes > 0; diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 4eb479a08debe4..a6369886629058 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -10,7 +10,7 @@ // // Usage: // // Startup: -// g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending, budgetBytes); +// g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending); // // // Reader/Writer thread: // { @@ -82,15 +82,15 @@ class EbrCollector final // Returns true if the calling thread is currently in a critical region. bool InCriticalRegion(); - // Detach the calling thread from this collector. Unlinks and frees per-thread - // EBR state. Should be called during thread shutdown. + // Detach the calling thread from this collector. Marks per-thread EBR state + // for deferred cleanup. Should be called during thread shutdown. void ThreadDetach(); // Returns true if there are pending deletions that may be reclaimable. - bool CleanupRequested(); + bool CleanUpRequested(); // Attempt to advance the epoch and reclaim safe pending deletions. - void TryReclaim(); + void CleanUpPending(); private: // Thread list management @@ -110,7 +110,7 @@ class EbrCollector final // Global epoch counter [0, EBR_EPOCHS-1] Volatile m_globalEpoch; - // Registered thread list (protected by m_threadListLock) + // Registered thread list (lock-free CAS insert; m_threadListLock used only for pruning) CrstStatic m_threadListLock; EbrThreadData* m_pThreadListHead = nullptr; diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp index 287baf16b862be..e9f701b169316c 100644 --- a/src/coreclr/vm/finalizerthread.cpp +++ b/src/coreclr/vm/finalizerthread.cpp @@ -153,7 +153,7 @@ bool FinalizerThread::HaveExtraWorkForFinalizer() || YieldProcessorNormalization::IsMeasurementScheduled() || HasDelayedDynamicMethod() || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads() - || g_HashMapEbr.CleanupRequested(); + || g_HashMapEbr.CleanUpRequested(); #endif // TARGET_WASM } @@ -204,10 +204,10 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread) CleanupDelayedDynamicMethods(); } - if (g_HashMapEbr.CleanupRequested()) + if (g_HashMapEbr.CleanUpRequested()) { GCX_PREEMP(); - g_HashMapEbr.TryReclaim(); + g_HashMapEbr.CleanUpPending(); } } From 07473cc8cdc02bdd17e9d69832debab3d5bb0c44 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 12:52:03 -0800 Subject: [PATCH 17/24] Heap-allocate EbrThreadData with TLS destructor for cleanup EbrThreadData was stored directly as a thread_local struct, but the node remains linked in the collector's thread list after the thread exits. When C++ TLS tears down the storage, CanAdvanceEpoch/TryAdvanceEpoch would chase a dangling pointer. Additionally, ThreadDetaching() in corhost.cpp only fires for threads with a runtime Thread object. Threads that used EBR without one would never get cleaned up. - Heap-allocate EbrThreadData so the node outlives the thread - Add EbrTlsDestructor to call ThreadDetach for all threads - TryAdvanceEpoch now deletes pruned nodes - Remove g_HashMapEbr.ThreadDetach() from corhost.cpp - Inline UnlinkThreadData logic into ThreadDetach TODO: The heap allocation in GetOrCreateThreadData (new EbrThreadData) runs on the GC_NOTRIGGER + MODE_COOPERATIVE hot path. This allocation should be moved to a safer location (e.g. thread setup) to avoid potential OOM on the critical dispatch path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/vm/corhost.cpp | 2 - src/coreclr/vm/ebr.cpp | 92 ++++++++++++++++++++++---------------- src/coreclr/vm/ebr.h | 3 +- 3 files changed, 56 insertions(+), 41 deletions(-) diff --git a/src/coreclr/vm/corhost.cpp b/src/coreclr/vm/corhost.cpp index 303db8cb5f83b0..6ddeb52ab906c2 100644 --- a/src/coreclr/vm/corhost.cpp +++ b/src/coreclr/vm/corhost.cpp @@ -1073,8 +1073,6 @@ void ThreadDetaching() // 2. When a fiber is destroyed, or OS calls FlsCallback after DLL_THREAD_DETACH process. // We will null the FLS and TLS entry if it matches the deleted one. - g_HashMapEbr.ThreadDetach(); - if (StressLog::t_pCurrentThreadLog != NULL) { #ifdef STRESS_LOG diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 498fdb15a86b9c..4ba77fd9ff5430 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -15,20 +15,20 @@ static constexpr uint32_t ACTIVE_FLAG = 0x80000000U; struct EbrThreadData { - EbrCollector* m_pCollector; + EbrCollector* m_pCollector = nullptr; // Local epoch with ACTIVE_FLAG. When the thread is quiescent (outside a // critical region) ACTIVE_FLAG is cleared and the epoch bits are zero. Volatile m_localEpoch; // Nesting depth for re-entrant critical regions. - uint32_t m_criticalDepth; + uint32_t m_criticalDepth = 0; // Logically deleted from the thread list. Volatile m_detached; // Intrusive linked list through the collector's thread list. - EbrThreadData* m_pNext; + EbrThreadData* m_pNext = nullptr; }; // Singly-linked list node for pending deletions. @@ -47,11 +47,28 @@ struct EbrPendingEntry final EbrPendingEntry* m_pNext; }; -// Each thread holds its own EbrThreadData via thread_local. -// We support only a single collector per process; if multiple collectors -// are needed this can be extended to a per-collector TLS map, but for the -// HashMap use-case a single global collector suffices. -static thread_local EbrThreadData t_pThreadData = {}; +// Each thread holds a pointer to its heap-allocated EbrThreadData. The node +// must outlive the thread because it remains linked in the collector's thread +// list until TryAdvanceEpoch physically prunes and deletes it. +static thread_local EbrThreadData* t_pThreadData = nullptr; + +// Destructor that runs when the thread's C++ thread_local storage is torn +// down. This ensures EBR cleanup happens for *all* threads that entered a +// critical region, not only threads that have a runtime Thread object (which +// is required for the RuntimeThreadShutdown / ThreadDetaching path). +struct EbrTlsDestructor final +{ + ~EbrTlsDestructor() + { + EbrThreadData* pData = t_pThreadData; + if (pData != nullptr && pData->m_pCollector != nullptr) + { + pData->m_pCollector->ThreadDetach(); + t_pThreadData = nullptr; + } + } +}; +static thread_local EbrTlsDestructor t_ebrTlsDestructor; // Global EBR collector for HashMap's async mode. // If you want to add another usage for Ebr in the future, please consider @@ -119,15 +136,18 @@ EbrCollector::GetOrCreateThreadData() } CONTRACTL_END; - EbrThreadData* pData = &t_pThreadData; - if (pData->m_pCollector == this) + EbrThreadData* pData = t_pThreadData; + if (pData != nullptr && pData->m_pCollector == this) return pData; + // Heap-allocate so the node outlives the thread. It will be freed + // when TryAdvanceEpoch prunes detached nodes. + pData = new (nothrow) EbrThreadData(); + if (pData == nullptr) + return nullptr; + pData->m_pCollector = this; - pData->m_localEpoch = 0; - pData->m_criticalDepth = 0; - pData->m_detached = FALSE; - pData->m_pNext = nullptr; + t_pThreadData = pData; // Link into the collector's thread list. // See TryAdvanceEpoch for the removal semantics of detached nodes. @@ -141,18 +161,6 @@ EbrCollector::GetOrCreateThreadData() return pData; } -void -EbrCollector::UnlinkThreadData(EbrThreadData* pData) -{ - LIMITED_METHOD_CONTRACT; - - if (pData == nullptr) - return; - - // Logically detach: the epoch scanner skips detached nodes. - pData->m_detached = TRUE; -} - // ============================================ // Critical region enter/exit // ============================================ @@ -201,7 +209,8 @@ EbrCollector::ExitCriticalRegion() _ASSERTE(m_initialized); - EbrThreadData* pData = &t_pThreadData; + EbrThreadData* pData = t_pThreadData; + _ASSERTE(pData != nullptr); _ASSERTE(pData->m_pCollector == this); _ASSERTE(pData->m_criticalDepth > 0); @@ -221,8 +230,8 @@ EbrCollector::InCriticalRegion() { LIMITED_METHOD_CONTRACT; - EbrThreadData* pData = &t_pThreadData; - if (pData->m_pCollector != this) + EbrThreadData* pData = t_pThreadData; + if (pData == nullptr || pData->m_pCollector != this) return false; return pData->m_criticalDepth > 0; } @@ -237,14 +246,15 @@ EbrCollector::ThreadDetach() } CONTRACTL_END; - EbrThreadData* pData = &t_pThreadData; - if (pData->m_pCollector != this) + EbrThreadData* pData = t_pThreadData; + if (pData == nullptr || pData->m_pCollector != this) return; _ASSERTE(!InCriticalRegion()); - UnlinkThreadData(pData); - t_pThreadData = {}; // Clear thread_local to prevent reuse after detach. + // Logically detach: the epoch scanner skips detached nodes. + // The heap-allocated node is freed later by TryAdvanceEpoch. + pData->m_detached = TRUE; } // ============================================ @@ -306,16 +316,22 @@ EbrCollector::TryAdvanceEpoch() uint32_t newEpoch = ((uint32_t)m_globalEpoch + 1) % EBR_EPOCHS; m_globalEpoch = newEpoch; - // Physically prune detached nodes. New nodes are only ever CAS-pushed at - // the head, so unlinking interior nodes here is safe without interfering - // with concurrent inserts. + // Physically prune and delete detached nodes. New nodes are only ever + // CAS-pushed at the head, so unlinking interior nodes here is safe + // without interfering with concurrent inserts. EbrThreadData** pp = &m_pThreadListHead; while (*pp != nullptr) { if ((*pp)->m_detached) - *pp = (*pp)->m_pNext; + { + EbrThreadData* pDetached = *pp; + *pp = pDetached->m_pNext; + delete pDetached; + } else + { pp = &(*pp)->m_pNext; + } } return true; @@ -395,7 +411,7 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es _ASSERTE(pfnDelete != nullptr); // Must be in a critical region. - EbrThreadData* pData = &t_pThreadData; + EbrThreadData* pData = t_pThreadData; _ASSERTE(pData->m_pCollector == this && pData->m_criticalDepth > 0); // Allocate pending entry. diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index a6369886629058..0435fa7360864f 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -95,7 +95,6 @@ class EbrCollector final private: // Thread list management EbrThreadData* GetOrCreateThreadData(); - void UnlinkThreadData(EbrThreadData* pData); // Epoch management bool CanAdvanceEpoch(); @@ -131,12 +130,14 @@ class EbrCriticalRegionHolder final EbrCriticalRegionHolder(EbrCollector* pCollector, bool fEnable) : m_pCollector(fEnable ? pCollector : nullptr) { + WRAPPER_NO_CONTRACT; if (m_pCollector != nullptr) m_pCollector->EnterCriticalRegion(); } ~EbrCriticalRegionHolder() { + WRAPPER_NO_CONTRACT; if (m_pCollector != nullptr) m_pCollector->ExitCriticalRegion(); } From 434ed7903dc75b1aac74ee2a16170f8187a9650a Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 14:14:29 -0800 Subject: [PATCH 18/24] Refactor EbrThreadData management to use thread-local instance and simplify thread detachment logic --- src/coreclr/vm/ebr.cpp | 111 +++++++++++++++++------------------------ src/coreclr/vm/ebr.h | 2 +- 2 files changed, 47 insertions(+), 66 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 4ba77fd9ff5430..0bb1ff9b272b18 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -24,9 +24,6 @@ struct EbrThreadData // Nesting depth for re-entrant critical regions. uint32_t m_criticalDepth = 0; - // Logically deleted from the thread list. - Volatile m_detached; - // Intrusive linked list through the collector's thread list. EbrThreadData* m_pNext = nullptr; }; @@ -47,10 +44,8 @@ struct EbrPendingEntry final EbrPendingEntry* m_pNext; }; -// Each thread holds a pointer to its heap-allocated EbrThreadData. The node -// must outlive the thread because it remains linked in the collector's thread -// list until TryAdvanceEpoch physically prunes and deletes it. -static thread_local EbrThreadData* t_pThreadData = nullptr; +// Each thread has a thread_local EbrThreadData instance. +static thread_local EbrThreadData t_pThreadData{}; // Destructor that runs when the thread's C++ thread_local storage is torn // down. This ensures EBR cleanup happens for *all* threads that entered a @@ -60,11 +55,11 @@ struct EbrTlsDestructor final { ~EbrTlsDestructor() { - EbrThreadData* pData = t_pThreadData; - if (pData != nullptr && pData->m_pCollector != nullptr) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != nullptr) { pData->m_pCollector->ThreadDetach(); - t_pThreadData = nullptr; + *pData = {}; } } }; @@ -136,21 +131,14 @@ EbrCollector::GetOrCreateThreadData() } CONTRACTL_END; - EbrThreadData* pData = t_pThreadData; - if (pData != nullptr && pData->m_pCollector == this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector == this) return pData; - // Heap-allocate so the node outlives the thread. It will be freed - // when TryAdvanceEpoch prunes detached nodes. - pData = new (nothrow) EbrThreadData(); - if (pData == nullptr) - return nullptr; - pData->m_pCollector = this; - t_pThreadData = pData; // Link into the collector's thread list. - // See TryAdvanceEpoch for the removal semantics of detached nodes. + // See ThreadDetach() for the removal semantics of detached nodes. EbrThreadData* pHead; do { @@ -190,7 +178,7 @@ EbrCollector::EnterCriticalRegion() pData->m_localEpoch = (epoch | ACTIVE_FLAG); // Full fence to ensure the epoch observation is visible before any - // reads in the critical region. This pairs with the acquire fence + // reads in the critical region. This pairs with the full fence // in TryAdvanceEpoch's scan. MemoryBarrier(); } @@ -209,8 +197,7 @@ EbrCollector::ExitCriticalRegion() _ASSERTE(m_initialized); - EbrThreadData* pData = t_pThreadData; - _ASSERTE(pData != nullptr); + EbrThreadData* pData = &t_pThreadData; _ASSERTE(pData->m_pCollector == this); _ASSERTE(pData->m_criticalDepth > 0); @@ -230,8 +217,8 @@ EbrCollector::InCriticalRegion() { LIMITED_METHOD_CONTRACT; - EbrThreadData* pData = t_pThreadData; - if (pData == nullptr || pData->m_pCollector != this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) return false; return pData->m_criticalDepth > 0; } @@ -245,16 +232,25 @@ EbrCollector::ThreadDetach() GC_NOTRIGGER; } CONTRACTL_END; + _ASSERTE(!InCriticalRegion()); - EbrThreadData* pData = t_pThreadData; - if (pData == nullptr || pData->m_pCollector != this) + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) return; - _ASSERTE(!InCriticalRegion()); + CrstHolder lock(&m_threadListLock); - // Logically detach: the epoch scanner skips detached nodes. - // The heap-allocated node is freed later by TryAdvanceEpoch. - pData->m_detached = TRUE; + // Physically prune detached nodes. New nodes are only ever CAS-pushed at + // the head, so unlinking interior nodes here is safe without interfering + // with concurrent inserts. + EbrThreadData** pp = &m_pThreadListHead; + while (*pp != nullptr) + { + if ((*pp) == pData) + *pp = (*pp)->m_pNext; + else + pp = &(*pp)->m_pNext; + } } // ============================================ @@ -264,24 +260,27 @@ EbrCollector::ThreadDetach() bool EbrCollector::CanAdvanceEpoch() { - LIMITED_METHOD_CONTRACT; + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + _ASSERTE(m_threadListLock.OwnedByCurrentThread()); uint32_t currentEpoch = m_globalEpoch; EbrThreadData* pData = VolatileLoad(&m_pThreadListHead); while (pData != nullptr) { - if (!pData->m_detached) + uint32_t localEpoch = pData->m_localEpoch; + bool active = (localEpoch & ACTIVE_FLAG) != 0; + if (active) { - uint32_t localEpoch = pData->m_localEpoch; - bool active = (localEpoch & ACTIVE_FLAG) != 0; - if (active) - { - // If an active thread has not yet observed the current epoch, - // we cannot advance. - if (localEpoch != (currentEpoch | ACTIVE_FLAG)) - return false; - } + // If an active thread has not yet observed the current epoch, + // we cannot advance. + if (localEpoch != (currentEpoch | ACTIVE_FLAG)) + return false; } pData = pData->m_pNext; @@ -301,12 +300,12 @@ EbrCollector::TryAdvanceEpoch() CONTRACTL_END; _ASSERTE(FinalizerThread::IsCurrentThreadFinalizer()); - // Serialize scan, epoch advance, and pruning under the lock. This prevents - // two concurrent callers from double-advancing the epoch and ensures the - // CanAdvanceEpoch result is still valid when we act on it. + // Epoch advance under the lock. This prevents two concurrent callers + // from double-advancing the epoch and ensures the CanAdvanceEpoch result + // is still valid when we act on it. CrstHolder lock(&m_threadListLock); - // Acquire fence to synchronize with the MemoryBarrier() calls in + // Full fence to synchronize with the MemoryBarrier() calls in // EnterCriticalRegion / ExitCriticalRegion. MemoryBarrier(); @@ -316,24 +315,6 @@ EbrCollector::TryAdvanceEpoch() uint32_t newEpoch = ((uint32_t)m_globalEpoch + 1) % EBR_EPOCHS; m_globalEpoch = newEpoch; - // Physically prune and delete detached nodes. New nodes are only ever - // CAS-pushed at the head, so unlinking interior nodes here is safe - // without interfering with concurrent inserts. - EbrThreadData** pp = &m_pThreadListHead; - while (*pp != nullptr) - { - if ((*pp)->m_detached) - { - EbrThreadData* pDetached = *pp; - *pp = pDetached->m_pNext; - delete pDetached; - } - else - { - pp = &(*pp)->m_pNext; - } - } - return true; } @@ -411,7 +392,7 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es _ASSERTE(pfnDelete != nullptr); // Must be in a critical region. - EbrThreadData* pData = t_pThreadData; + EbrThreadData* pData = &t_pThreadData; _ASSERTE(pData->m_pCollector == this && pData->m_criticalDepth > 0); // Allocate pending entry. diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 0435fa7360864f..15bf8db2f867ba 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -109,7 +109,7 @@ class EbrCollector final // Global epoch counter [0, EBR_EPOCHS-1] Volatile m_globalEpoch; - // Registered thread list (lock-free CAS insert; m_threadListLock used only for pruning) + // Registered thread list (m_threadListLock used for pruning and epoch scanning) CrstStatic m_threadListLock; EbrThreadData* m_pThreadListHead = nullptr; From 48add32a543c9dc0b3625e685907d0f94c3b0fb7 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 16:27:43 -0800 Subject: [PATCH 19/24] Apply suggestions from code review --- src/coreclr/vm/ebr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 0bb1ff9b272b18..93bb81a377ecdf 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -45,7 +45,7 @@ struct EbrPendingEntry final }; // Each thread has a thread_local EbrThreadData instance. -static thread_local EbrThreadData t_pThreadData{}; +static thread_local EbrThreadData t_pThreadData; // Destructor that runs when the thread's C++ thread_local storage is torn // down. This ensures EBR cleanup happens for *all* threads that entered a From b575e0226bd243e94bcf45dca3c63f647fbbe5ca Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 16:29:14 -0800 Subject: [PATCH 20/24] Remove unused header --- src/coreclr/vm/corhost.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/vm/corhost.cpp b/src/coreclr/vm/corhost.cpp index 6ddeb52ab906c2..b3b83121f9ba1c 100644 --- a/src/coreclr/vm/corhost.cpp +++ b/src/coreclr/vm/corhost.cpp @@ -37,7 +37,6 @@ #endif // !TARGET_UNIX #include "nativelibrary.h" -#include "ebr.h" #ifndef DACCESS_COMPILE From cdafcc1d1af14fbd158aa16785949c1b79d68f5b Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Wed, 18 Feb 2026 16:57:09 -0800 Subject: [PATCH 21/24] Fix EBR comments and remove Ebr Crst types - Fix comment on t_pThreadData: it is a thread_local value, not a heap-allocated pointer, and ThreadDetach prunes it (not TryAdvanceEpoch). - Fix m_threadListLock comment: used for pruning and epoch scanning, not only pruning. - Fix fence comments: MemoryBarrier() is a full fence, not an acquire fence. - Remove CrstEbrPending and CrstEbrThreadList from CrstTypes.def and regenerate crsttypes_generated.h. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/inc/CrstTypes.def | 9 -- src/coreclr/inc/crsttypes_generated.h | 162 +++++++++++++------------- src/coreclr/vm/ceemain.cpp | 2 +- src/coreclr/vm/ebr.cpp | 10 +- src/coreclr/vm/ebr.h | 6 +- 5 files changed, 88 insertions(+), 101 deletions(-) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 6685868672ffeb..8cd820022658cd 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -186,15 +186,6 @@ Crst DynamicMT AcquiredBefore CodeVersioning End -// EBR (Epoch-Based Reclamation) internal locks. These are leaf locks that -// protect the EBR thread list and pending deletion queues. They must never -// be held across HashMap operations or GC transitions. -Crst EbrPending -End - -Crst EbrThreadList -End - Crst EventStore End diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index 02addac55c29a9..713f6bd5b33f33 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -41,86 +41,84 @@ enum CrstType CrstDebuggerMutex = 23, CrstDynamicIL = 24, CrstDynamicMT = 25, - CrstEbrPending = 26, - CrstEbrThreadList = 27, - CrstEtwTypeLogHash = 28, - CrstEventPipe = 29, - CrstEventStore = 30, - CrstException = 31, - CrstExecutableAllocatorLock = 32, - CrstFCall = 33, - CrstFrozenObjectHeap = 34, - CrstFuncPtrStubs = 35, - CrstFusionAppCtx = 36, - CrstGCCover = 37, - CrstGenericDictionaryExpansion = 38, - CrstGlobalStrLiteralMap = 39, - CrstHandleTable = 40, - CrstIJWFixupData = 41, - CrstIJWHash = 42, - CrstILStubGen = 43, - CrstInlineTrackingMap = 44, - CrstInstMethodHashTable = 45, - CrstInterfaceDispatchGlobalLists = 46, - CrstInterop = 47, - CrstInteropData = 48, - CrstIsJMCMethod = 49, - CrstISymUnmanagedReader = 50, - CrstJit = 51, - CrstJitInlineTrackingMap = 52, - CrstJitPatchpoint = 53, - CrstJumpStubCache = 54, - CrstLeafLock = 55, - CrstListLock = 56, - CrstLoaderAllocator = 57, - CrstLoaderAllocatorReferences = 58, - CrstLoaderHeap = 59, - CrstManagedObjectWrapperMap = 60, - CrstMethodDescBackpatchInfoTracker = 61, - CrstMethodTableExposedObject = 62, - CrstModule = 63, - CrstModuleLookupTable = 64, - CrstMulticoreJitHash = 65, - CrstMulticoreJitManager = 66, - CrstNativeImageEagerFixups = 67, - CrstNativeImageLoad = 68, - CrstNotifyGdb = 69, - CrstPEImage = 70, - CrstPendingTypeLoadEntry = 71, - CrstPerfMap = 72, - CrstPgoData = 73, - CrstPinnedByrefValidation = 74, - CrstPinnedHeapHandleTable = 75, - CrstProfilerGCRefDataFreeList = 76, - CrstProfilingAPIStatus = 77, - CrstRCWCache = 78, - CrstRCWCleanupList = 79, - CrstReadyToRunEntryPointToMethodDescMap = 80, - CrstReflection = 81, - CrstReJITGlobalRequest = 82, - CrstSigConvert = 83, - CrstSingleUseLock = 84, - CrstStressLog = 85, - CrstStubCache = 86, - CrstStubDispatchCache = 87, - CrstSyncBlockCache = 88, - CrstSyncHashLock = 89, - CrstSystemDomain = 90, - CrstSystemDomainDelayedUnloadList = 91, - CrstThreadIdDispenser = 92, - CrstThreadLocalStorageLock = 93, - CrstThreadStore = 94, - CrstTieredCompilation = 95, - CrstTypeEquivalenceMap = 96, - CrstTypeIDMap = 97, - CrstUMEntryThunkCache = 98, - CrstUMEntryThunkFreeListLock = 99, - CrstUniqueStack = 100, - CrstUnresolvedClassLock = 101, - CrstUnwindInfoTableLock = 102, - CrstVSDIndirectionCellLock = 103, - CrstWrapperTemplate = 104, - kNumberOfCrstTypes = 105 + CrstEtwTypeLogHash = 26, + CrstEventPipe = 27, + CrstEventStore = 28, + CrstException = 29, + CrstExecutableAllocatorLock = 30, + CrstFCall = 31, + CrstFrozenObjectHeap = 32, + CrstFuncPtrStubs = 33, + CrstFusionAppCtx = 34, + CrstGCCover = 35, + CrstGenericDictionaryExpansion = 36, + CrstGlobalStrLiteralMap = 37, + CrstHandleTable = 38, + CrstIJWFixupData = 39, + CrstIJWHash = 40, + CrstILStubGen = 41, + CrstInlineTrackingMap = 42, + CrstInstMethodHashTable = 43, + CrstInterfaceDispatchGlobalLists = 44, + CrstInterop = 45, + CrstInteropData = 46, + CrstIsJMCMethod = 47, + CrstISymUnmanagedReader = 48, + CrstJit = 49, + CrstJitInlineTrackingMap = 50, + CrstJitPatchpoint = 51, + CrstJumpStubCache = 52, + CrstLeafLock = 53, + CrstListLock = 54, + CrstLoaderAllocator = 55, + CrstLoaderAllocatorReferences = 56, + CrstLoaderHeap = 57, + CrstManagedObjectWrapperMap = 58, + CrstMethodDescBackpatchInfoTracker = 59, + CrstMethodTableExposedObject = 60, + CrstModule = 61, + CrstModuleLookupTable = 62, + CrstMulticoreJitHash = 63, + CrstMulticoreJitManager = 64, + CrstNativeImageEagerFixups = 65, + CrstNativeImageLoad = 66, + CrstNotifyGdb = 67, + CrstPEImage = 68, + CrstPendingTypeLoadEntry = 69, + CrstPerfMap = 70, + CrstPgoData = 71, + CrstPinnedByrefValidation = 72, + CrstPinnedHeapHandleTable = 73, + CrstProfilerGCRefDataFreeList = 74, + CrstProfilingAPIStatus = 75, + CrstRCWCache = 76, + CrstRCWCleanupList = 77, + CrstReadyToRunEntryPointToMethodDescMap = 78, + CrstReflection = 79, + CrstReJITGlobalRequest = 80, + CrstSigConvert = 81, + CrstSingleUseLock = 82, + CrstStressLog = 83, + CrstStubCache = 84, + CrstStubDispatchCache = 85, + CrstSyncBlockCache = 86, + CrstSyncHashLock = 87, + CrstSystemDomain = 88, + CrstSystemDomainDelayedUnloadList = 89, + CrstThreadIdDispenser = 90, + CrstThreadLocalStorageLock = 91, + CrstThreadStore = 92, + CrstTieredCompilation = 93, + CrstTypeEquivalenceMap = 94, + CrstTypeIDMap = 95, + CrstUMEntryThunkCache = 96, + CrstUMEntryThunkFreeListLock = 97, + CrstUniqueStack = 98, + CrstUnresolvedClassLock = 99, + CrstUnwindInfoTableLock = 100, + CrstVSDIndirectionCellLock = 101, + CrstWrapperTemplate = 102, + kNumberOfCrstTypes = 103 }; #endif // __CRST_TYPES_INCLUDED @@ -157,8 +155,6 @@ int g_rgCrstLevelMap[] = 12, // CrstDebuggerMutex 0, // CrstDynamicIL 9, // CrstDynamicMT - 0, // CrstEbrPending - 0, // CrstEbrThreadList 0, // CrstEtwTypeLogHash 19, // CrstEventPipe 0, // CrstEventStore @@ -267,8 +263,6 @@ LPCSTR g_rgCrstNameMap[] = "CrstDebuggerMutex", "CrstDynamicIL", "CrstDynamicMT", - "CrstEbrPending", - "CrstEbrThreadList", "CrstEtwTypeLogHash", "CrstEventPipe", "CrstEventStore", diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 95f670dbcd6151..8e3add45fb3f27 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -789,7 +789,7 @@ void EEStartupHelper() // Initialize EBR (Epoch-Based Reclamation) for HashMap's async mode. // This must be done before any HashMap is initialized with fAsyncMode=TRUE. - g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending); + g_HashMapEbr.Init(); StubManager::InitializeStubManagers(); diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 93bb81a377ecdf..125d8230f0c4fe 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -76,7 +76,7 @@ EbrCollector g_HashMapEbr; // ============================================ void -EbrCollector::Init(CrstType crstThreadList, CrstType crstPending) +EbrCollector::Init() { CONTRACTL { @@ -93,8 +93,12 @@ EbrCollector::Init(CrstType crstThreadList, CrstType crstPending) for (uint32_t i = 0; i < EBR_EPOCHS; i++) m_pPendingHeads[i] = nullptr; - m_threadListLock.Init(crstThreadList); - m_pendingLock.Init(crstPending); + m_threadListLock.Init(CrstLeafLock); + + // The pending lock is a leaf that can be taken in any mode. + // It is not expected to interact with the GC in any way. + // The QueueForDeletion() operation can occur at inconvenient times. + m_pendingLock.Init(CrstLeafLock, CrstFlags::CRST_UNSAFE_ANYMODE); m_initialized = true; } diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h index 15bf8db2f867ba..b3d7891af35cee 100644 --- a/src/coreclr/vm/ebr.h +++ b/src/coreclr/vm/ebr.h @@ -10,7 +10,7 @@ // // Usage: // // Startup: -// g_HashMapEbr.Init(CrstEbrThreadList, CrstEbrPending); +// g_HashMapEbr.Init(); // // // Reader/Writer thread: // { @@ -56,9 +56,7 @@ class EbrCollector final EbrCollector& operator=(EbrCollector&&) = delete; // Initialize the collector. - // crstThreadList: Crst type for the thread list lock - // crstPending: Crst type for the pending deletion queue lock - void Init(CrstType crstThreadList, CrstType crstPending); + void Init(); // Enter a critical region. While in a critical region, objects queued for // deletion will not be freed. Re-entrant: nested calls are counted. From 2367317f2774a77322a6f2b4a13774904b93e16c Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Thu, 19 Feb 2026 15:19:18 -0800 Subject: [PATCH 22/24] Remove unnecessary full fence make all loads/stores explicit. --- src/coreclr/vm/ebr.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 125d8230f0c4fe..9f1fa1c20aa795 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -87,7 +87,7 @@ EbrCollector::Init() _ASSERTE(!m_initialized); - m_globalEpoch = 0; + m_globalEpoch.Store(0); m_pendingSizeInBytes = 0; m_pThreadListHead = nullptr; for (uint32_t i = 0; i < EBR_EPOCHS; i++) @@ -178,8 +178,8 @@ EbrCollector::EnterCriticalRegion() if (pData->m_criticalDepth == 1) { // Outermost entry: observe the global epoch and set ACTIVE_FLAG. - uint32_t epoch = m_globalEpoch; - pData->m_localEpoch = (epoch | ACTIVE_FLAG); + uint32_t epoch = m_globalEpoch.Load(); + pData->m_localEpoch.Store(epoch | ACTIVE_FLAG); // Full fence to ensure the epoch observation is visible before any // reads in the critical region. This pairs with the full fence @@ -211,8 +211,7 @@ EbrCollector::ExitCriticalRegion() { // Outermost exit: ensure all stores in the critical path are visible // before clearing the active flag. - MemoryBarrier(); - pData->m_localEpoch = 0; + pData->m_localEpoch.Store(0); } } @@ -272,12 +271,12 @@ EbrCollector::CanAdvanceEpoch() CONTRACTL_END; _ASSERTE(m_threadListLock.OwnedByCurrentThread()); - uint32_t currentEpoch = m_globalEpoch; + uint32_t currentEpoch = m_globalEpoch.Load(); EbrThreadData* pData = VolatileLoad(&m_pThreadListHead); while (pData != nullptr) { - uint32_t localEpoch = pData->m_localEpoch; + uint32_t localEpoch = pData->m_localEpoch.Load(); bool active = (localEpoch & ACTIVE_FLAG) != 0; if (active) { @@ -309,15 +308,14 @@ EbrCollector::TryAdvanceEpoch() // is still valid when we act on it. CrstHolder lock(&m_threadListLock); - // Full fence to synchronize with the MemoryBarrier() calls in - // EnterCriticalRegion / ExitCriticalRegion. + // Full fence to synchronize with EnterCriticalRegion / ExitCriticalRegion. MemoryBarrier(); if (!CanAdvanceEpoch()) return false; - uint32_t newEpoch = ((uint32_t)m_globalEpoch + 1) % EBR_EPOCHS; - m_globalEpoch = newEpoch; + uint32_t newEpoch = (m_globalEpoch.Load() + 1) % EBR_EPOCHS; + m_globalEpoch.Store(newEpoch); return true; } @@ -358,7 +356,7 @@ EbrCollector::CleanUpPending() // Objects retired 2 epochs ago are safe to delete. With 3 epochs // and clock arithmetic, the safe slot is (current + 1) % 3. - uint32_t currentEpoch = m_globalEpoch; + uint32_t currentEpoch = m_globalEpoch.Load(); uint32_t safeSlot = (currentEpoch + 1) % EBR_EPOCHS; pDetached = DetachQueue(safeSlot); @@ -413,7 +411,7 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es { CrstHolder lock(&m_pendingLock); - uint32_t slot = m_globalEpoch; + uint32_t slot = m_globalEpoch.Load(); pEntry->m_pNext = m_pPendingHeads[slot]; m_pPendingHeads[slot] = pEntry; m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes + estimatedSize; From a6e1d92bdc57b4cc342f5cd2c9cff327e67feac2 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Thu, 19 Feb 2026 16:40:46 -0800 Subject: [PATCH 23/24] Fix CrstFlags usage and reset thread EBR data in ThreadDetach --- src/coreclr/vm/ebr.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 9f1fa1c20aa795..9a0ecdfa53b8f4 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -98,7 +98,7 @@ EbrCollector::Init() // The pending lock is a leaf that can be taken in any mode. // It is not expected to interact with the GC in any way. // The QueueForDeletion() operation can occur at inconvenient times. - m_pendingLock.Init(CrstLeafLock, CrstFlags::CRST_UNSAFE_ANYMODE); + m_pendingLock.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE); m_initialized = true; } @@ -254,6 +254,9 @@ EbrCollector::ThreadDetach() else pp = &(*pp)->m_pNext; } + + // Reset the thread's EBR data. + *pData = {}; } // ============================================ @@ -365,6 +368,7 @@ EbrCollector::CleanUpPending() size_t freed = DeletePendingEntries(pDetached); if (freed > 0) { + CrstHolder lock(&m_pendingLock); _ASSERTE((size_t)m_pendingSizeInBytes >= freed); m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes - freed; } From 83f217670e093c933dca54a1a87a12bbf0ac1577 Mon Sep 17 00:00:00 2001 From: Aaron R Robinson Date: Thu, 19 Feb 2026 19:50:10 -0800 Subject: [PATCH 24/24] Enhance EbrCollector: Add DetachedCollector, improve thread detach handling, and refine pending size management --- src/coreclr/vm/ebr.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp index 9a0ecdfa53b8f4..425d8b068556ec 100644 --- a/src/coreclr/vm/ebr.cpp +++ b/src/coreclr/vm/ebr.cpp @@ -13,6 +13,8 @@ // Combined with the epoch value in m_localEpoch for a single atomic field. static constexpr uint32_t ACTIVE_FLAG = 0x80000000U; +static EbrCollector* const DetachedCollector = (EbrCollector*)-1; + struct EbrThreadData { EbrCollector* m_pCollector = nullptr; @@ -59,7 +61,6 @@ struct EbrTlsDestructor final if (pData->m_pCollector != nullptr) { pData->m_pCollector->ThreadDetach(); - *pData = {}; } } }; @@ -139,6 +140,8 @@ EbrCollector::GetOrCreateThreadData() if (pData->m_pCollector == this) return pData; + _ASSERTE_ALL_BUILDS(pData->m_pCollector != DetachedCollector && "Attempt to reattach detached thread."); + pData->m_pCollector = this; // Link into the collector's thread list. @@ -255,8 +258,9 @@ EbrCollector::ThreadDetach() pp = &(*pp)->m_pNext; } - // Reset the thread's EBR data. + // Reset and then poison the thread's EBR data. *pData = {}; + pData->m_pCollector = DetachedCollector; } // ============================================ @@ -412,13 +416,22 @@ EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t es } // Insert into the current epoch's pending list. + size_t oldPending; { CrstHolder lock(&m_pendingLock); + oldPending = (size_t)m_pendingSizeInBytes; uint32_t slot = m_globalEpoch.Load(); pEntry->m_pNext = m_pPendingHeads[slot]; m_pPendingHeads[slot] = pEntry; - m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes + estimatedSize; + m_pendingSizeInBytes = oldPending + estimatedSize; + } + + const size_t threshold = 128 * 1024; // 128KB is an arbitrary threshold for enabling finalization. Tune as needed. + if (oldPending < threshold && threshold <= (size_t)m_pendingSizeInBytes) + { + FinalizerThread::EnableFinalization(); } + return true; }