diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 982ea2ba74831b..3a4c0babdab259 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -70,6 +70,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON disassembler.cpp domainassembly.cpp dynamicmethod.cpp + ebr.cpp ecall.cpp eedbginterfaceimpl.cpp eehash.cpp @@ -169,6 +170,7 @@ set(VM_HEADERS_DAC_AND_WKS_COMMON disassembler.h domainassembly.h dynamicmethod.h + ebr.h ecall.h eedbginterfaceimpl.h eedbginterfaceimpl.inl diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 11a9b8b5e00923..8e3add45fb3f27 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -123,6 +123,7 @@ #include "clsload.hpp" #include "object.h" #include "hash.h" +#include "ebr.h" #include "ecall.h" #include "ceemain.h" #include "dllimport.h" @@ -786,6 +787,10 @@ void EEStartupHelper() // Cache the (potentially user-overridden) values now so they are accessible from asm routines InitializeSpinConstants(); + // Initialize EBR (Epoch-Based Reclamation) for HashMap's async mode. + // This must be done before any HashMap is initialized with fAsyncMode=TRUE. + g_HashMapEbr.Init(); + StubManager::InitializeStubManagers(); // Set up the cor handle map. This map is used to load assemblies in diff --git a/src/coreclr/vm/ebr.cpp b/src/coreclr/vm/ebr.cpp new file mode 100644 index 00000000000000..425d8b068556ec --- /dev/null +++ b/src/coreclr/vm/ebr.cpp @@ -0,0 +1,437 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "ebr.h" +#include "finalizerthread.h" + +// ============================================ +// Per-thread EBR state +// ============================================ + +// Bit flag indicating the thread is in a critical region. +// Combined with the epoch value in m_localEpoch for a single atomic field. +static constexpr uint32_t ACTIVE_FLAG = 0x80000000U; + +static EbrCollector* const DetachedCollector = (EbrCollector*)-1; + +struct EbrThreadData +{ + EbrCollector* m_pCollector = nullptr; + + // Local epoch with ACTIVE_FLAG. When the thread is quiescent (outside a + // critical region) ACTIVE_FLAG is cleared and the epoch bits are zero. + Volatile m_localEpoch; + + // Nesting depth for re-entrant critical regions. + uint32_t m_criticalDepth = 0; + + // Intrusive linked list through the collector's thread list. + EbrThreadData* m_pNext = nullptr; +}; + +// Singly-linked list node for pending deletions. +struct EbrPendingEntry final +{ + EbrPendingEntry(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) + : m_pObject{ pObject } + , m_pfnDelete{ pfnDelete } + , m_estimatedSize{ estimatedSize } + , m_pNext{ nullptr } + {} + + void* m_pObject; + EbrDeleteFunc m_pfnDelete; + size_t m_estimatedSize; + EbrPendingEntry* m_pNext; +}; + +// Each thread has a thread_local EbrThreadData instance. +static thread_local EbrThreadData t_pThreadData; + +// Destructor that runs when the thread's C++ thread_local storage is torn +// down. This ensures EBR cleanup happens for *all* threads that entered a +// critical region, not only threads that have a runtime Thread object (which +// is required for the RuntimeThreadShutdown / ThreadDetaching path). +struct EbrTlsDestructor final +{ + ~EbrTlsDestructor() + { + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != nullptr) + { + pData->m_pCollector->ThreadDetach(); + } + } +}; +static thread_local EbrTlsDestructor t_ebrTlsDestructor; + +// Global EBR collector for HashMap's async mode. +// If you want to add another usage for Ebr in the future, please consider +// the tradeoffs between creating multiple collectors or treating this as +// a single shared global collector. +EbrCollector g_HashMapEbr; + +// ============================================ +// EbrCollector implementation +// ============================================ + +void +EbrCollector::Init() +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + } + CONTRACTL_END; + + _ASSERTE(!m_initialized); + + m_globalEpoch.Store(0); + m_pendingSizeInBytes = 0; + m_pThreadListHead = nullptr; + for (uint32_t i = 0; i < EBR_EPOCHS; i++) + m_pPendingHeads[i] = nullptr; + + m_threadListLock.Init(CrstLeafLock); + + // The pending lock is a leaf that can be taken in any mode. + // It is not expected to interact with the GC in any way. + // The QueueForDeletion() operation can occur at inconvenient times. + m_pendingLock.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE); + + m_initialized = true; +} + +// Delete all entries in a detached pending list. Must be called outside m_pendingLock. +// Returns the total estimated size freed. +static size_t DeletePendingEntries(EbrPendingEntry* pEntry) +{ + LIMITED_METHOD_CONTRACT; + + size_t freedSize = 0; + while (pEntry != nullptr) + { + EbrPendingEntry* pNext = pEntry->m_pNext; + pEntry->m_pfnDelete(pEntry->m_pObject); + freedSize += pEntry->m_estimatedSize; + delete pEntry; + pEntry = pNext; + } + return freedSize; +} + +// ============================================ +// Thread registration +// ============================================ + +EbrThreadData* +EbrCollector::GetOrCreateThreadData() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector == this) + return pData; + + _ASSERTE_ALL_BUILDS(pData->m_pCollector != DetachedCollector && "Attempt to reattach detached thread."); + + pData->m_pCollector = this; + + // Link into the collector's thread list. + // See ThreadDetach() for the removal semantics of detached nodes. + EbrThreadData* pHead; + do + { + pHead = VolatileLoad(&m_pThreadListHead); + pData->m_pNext = pHead; + } while (InterlockedCompareExchangeT(&m_pThreadListHead, pData, pHead) != pHead); + + return pData; +} + +// ============================================ +// Critical region enter/exit +// ============================================ + +void +EbrCollector::EnterCriticalRegion() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + + EbrThreadData* pData = GetOrCreateThreadData(); + _ASSERTE(pData != nullptr); + + pData->m_criticalDepth++; + + if (pData->m_criticalDepth == 1) + { + // Outermost entry: observe the global epoch and set ACTIVE_FLAG. + uint32_t epoch = m_globalEpoch.Load(); + pData->m_localEpoch.Store(epoch | ACTIVE_FLAG); + + // Full fence to ensure the epoch observation is visible before any + // reads in the critical region. This pairs with the full fence + // in TryAdvanceEpoch's scan. + MemoryBarrier(); + } +} + +void +EbrCollector::ExitCriticalRegion() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + + EbrThreadData* pData = &t_pThreadData; + _ASSERTE(pData->m_pCollector == this); + _ASSERTE(pData->m_criticalDepth > 0); + + pData->m_criticalDepth--; + + if (pData->m_criticalDepth == 0) + { + // Outermost exit: ensure all stores in the critical path are visible + // before clearing the active flag. + pData->m_localEpoch.Store(0); + } +} + +bool +EbrCollector::InCriticalRegion() +{ + LIMITED_METHOD_CONTRACT; + + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) + return false; + return pData->m_criticalDepth > 0; +} + +void +EbrCollector::ThreadDetach() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + _ASSERTE(!InCriticalRegion()); + + EbrThreadData* pData = &t_pThreadData; + if (pData->m_pCollector != this) + return; + + CrstHolder lock(&m_threadListLock); + + // Physically prune detached nodes. New nodes are only ever CAS-pushed at + // the head, so unlinking interior nodes here is safe without interfering + // with concurrent inserts. + EbrThreadData** pp = &m_pThreadListHead; + while (*pp != nullptr) + { + if ((*pp) == pData) + *pp = (*pp)->m_pNext; + else + pp = &(*pp)->m_pNext; + } + + // Reset and then poison the thread's EBR data. + *pData = {}; + pData->m_pCollector = DetachedCollector; +} + +// ============================================ +// Epoch advancement +// ============================================ + +bool +EbrCollector::CanAdvanceEpoch() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + _ASSERTE(m_threadListLock.OwnedByCurrentThread()); + + uint32_t currentEpoch = m_globalEpoch.Load(); + + EbrThreadData* pData = VolatileLoad(&m_pThreadListHead); + while (pData != nullptr) + { + uint32_t localEpoch = pData->m_localEpoch.Load(); + bool active = (localEpoch & ACTIVE_FLAG) != 0; + if (active) + { + // If an active thread has not yet observed the current epoch, + // we cannot advance. + if (localEpoch != (currentEpoch | ACTIVE_FLAG)) + return false; + } + + pData = pData->m_pNext; + } + + return true; +} + +bool +EbrCollector::TryAdvanceEpoch() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + _ASSERTE(FinalizerThread::IsCurrentThreadFinalizer()); + + // Epoch advance under the lock. This prevents two concurrent callers + // from double-advancing the epoch and ensures the CanAdvanceEpoch result + // is still valid when we act on it. + CrstHolder lock(&m_threadListLock); + + // Full fence to synchronize with EnterCriticalRegion / ExitCriticalRegion. + MemoryBarrier(); + + if (!CanAdvanceEpoch()) + return false; + + uint32_t newEpoch = (m_globalEpoch.Load() + 1) % EBR_EPOCHS; + m_globalEpoch.Store(newEpoch); + + return true; +} + +// ============================================ +// Deferred deletion +// ============================================ + +// Detach the pending list for a given slot. +// Returns the head of the detached list. +EbrPendingEntry* +EbrCollector::DetachQueue(uint32_t slot) +{ + LIMITED_METHOD_CONTRACT; + _ASSERTE(m_pendingLock.OwnedByCurrentThread()); + + EbrPendingEntry* pHead = m_pPendingHeads[slot]; + m_pPendingHeads[slot] = nullptr; + return pHead; +} + +void +EbrCollector::CleanUpPending() +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + _ASSERTE(FinalizerThread::IsCurrentThreadFinalizer()); + + if (TryAdvanceEpoch()) + { + EbrPendingEntry* pDetached; + { + CrstHolder lock(&m_pendingLock); + + // Objects retired 2 epochs ago are safe to delete. With 3 epochs + // and clock arithmetic, the safe slot is (current + 1) % 3. + uint32_t currentEpoch = m_globalEpoch.Load(); + uint32_t safeSlot = (currentEpoch + 1) % EBR_EPOCHS; + + pDetached = DetachQueue(safeSlot); + } + + size_t freed = DeletePendingEntries(pDetached); + if (freed > 0) + { + CrstHolder lock(&m_pendingLock); + _ASSERTE((size_t)m_pendingSizeInBytes >= freed); + m_pendingSizeInBytes = (size_t)m_pendingSizeInBytes - freed; + } + } +} + +bool +EbrCollector::CleanUpRequested() +{ + LIMITED_METHOD_CONTRACT; + return m_initialized && (size_t)m_pendingSizeInBytes > 0; +} + +bool +EbrCollector::QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + _ASSERTE(m_initialized); + _ASSERTE(pObject != nullptr); + _ASSERTE(pfnDelete != nullptr); + + // Must be in a critical region. + EbrThreadData* pData = &t_pThreadData; + _ASSERTE(pData->m_pCollector == this && pData->m_criticalDepth > 0); + + // Allocate pending entry. + EbrPendingEntry* pEntry = new (nothrow) EbrPendingEntry(pObject, pfnDelete, estimatedSize); + if (pEntry == nullptr) + { + // If we can't allocate, we must not delete pObject immediately, because + // EBR readers in async mode may still be traversing data structures that + // reference it. + return false; + } + + // Insert into the current epoch's pending list. + size_t oldPending; + { + CrstHolder lock(&m_pendingLock); + + oldPending = (size_t)m_pendingSizeInBytes; + uint32_t slot = m_globalEpoch.Load(); + pEntry->m_pNext = m_pPendingHeads[slot]; + m_pPendingHeads[slot] = pEntry; + m_pendingSizeInBytes = oldPending + estimatedSize; + } + + const size_t threshold = 128 * 1024; // 128KB is an arbitrary threshold for enabling finalization. Tune as needed. + if (oldPending < threshold && threshold <= (size_t)m_pendingSizeInBytes) + { + FinalizerThread::EnableFinalization(); + } + + return true; +} diff --git a/src/coreclr/vm/ebr.h b/src/coreclr/vm/ebr.h new file mode 100644 index 00000000000000..b3d7891af35cee --- /dev/null +++ b/src/coreclr/vm/ebr.h @@ -0,0 +1,152 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// ebr.h - Epoch-Based Reclamation for safe memory reclamation +// +// Implements the EBR algorithm from K. Fraser, "Practical Lock-Freedom" +// (UCAM-CL-TR-579). Provides safe, low-overhead deferred deletion for +// concurrent data structures without requiring GC suspension or COOP +// mode transitions. +// +// Usage: +// // Startup: +// g_HashMapEbr.Init(); +// +// // Reader/Writer thread: +// { +// EbrCriticalRegionHolder ebr(&g_HashMapEbr, m_fAsyncMode); +// // ... access shared data safely ... +// // Objects queued for deletion will not be freed while any thread +// // is in a critical region that observed a prior epoch. +// } +// +// // Writer thread (inside critical region), after replacing shared pointer: +// g_HashMapEbr.QueueForDeletion(pOldData, deleteFn, sizeEstimate); +// +// // Shutdown: +// The EBR collector doesn't support a shutdown feature. CoreCLR doesn't support +// clean shutdown. + +#ifndef __EBR_H__ +#define __EBR_H__ + +// Number of epoch slots: current, current-1, current-2 +static constexpr uint32_t EBR_EPOCHS = 3; + +// Callback to delete a retired object. +typedef void (*EbrDeleteFunc)(void* pObject); + +// Forward declarations +struct EbrThreadData; +struct EbrPendingEntry; + +// EBR Collector - manages epoch-based deferred reclamation. +// +// A single collector instance is typically shared across all threads that +// access a particular set of shared data structures. +class EbrCollector final +{ +public: + EbrCollector() = default; + ~EbrCollector() = default; + + EbrCollector(const EbrCollector&) = delete; + EbrCollector& operator=(const EbrCollector&) = delete; + EbrCollector(EbrCollector&&) = delete; + EbrCollector& operator=(EbrCollector&&) = delete; + + // Initialize the collector. + void Init(); + + // Enter a critical region. While in a critical region, objects queued for + // deletion will not be freed. Re-entrant: nested calls are counted. + void EnterCriticalRegion(); + + // Exit a critical region. Must pair with EnterCriticalRegion. + void ExitCriticalRegion(); + + // Queue an object for deferred deletion. Must be called from within a + // critical region. The object will be deleted via pfnDelete once all + // threads have passed through a quiescent state. + // pObject: the object to retire (must not be nullptr) + // pfnDelete: function to call to delete the object + // estimatedSize: approximate size in bytes (for tracking) + // Returns true if the object was successfully queued for deletion, false if + // the queue allocation failed (in which case the object was not queued and will not be deleted). + // Note: if queuing fails, the caller is responsible for ensuring the object is eventually deleted, + // either by retrying the queue or by deleting it directly if safe to do so. + bool QueueForDeletion(void* pObject, EbrDeleteFunc pfnDelete, size_t estimatedSize); + + // Returns true if the calling thread is currently in a critical region. + bool InCriticalRegion(); + + // Detach the calling thread from this collector. Marks per-thread EBR state + // for deferred cleanup. Should be called during thread shutdown. + void ThreadDetach(); + + // Returns true if there are pending deletions that may be reclaimable. + bool CleanUpRequested(); + + // Attempt to advance the epoch and reclaim safe pending deletions. + void CleanUpPending(); + +private: + // Thread list management + EbrThreadData* GetOrCreateThreadData(); + + // Epoch management + bool CanAdvanceEpoch(); + bool TryAdvanceEpoch(); + + // Reclamation + EbrPendingEntry* DetachQueue(uint32_t slot); + + // State + bool m_initialized = false; + + // Global epoch counter [0, EBR_EPOCHS-1] + Volatile m_globalEpoch; + + // Registered thread list (m_threadListLock used for pruning and epoch scanning) + CrstStatic m_threadListLock; + EbrThreadData* m_pThreadListHead = nullptr; + + // Pending deletion queues, one per epoch slot (protected by m_pendingLock) + CrstStatic m_pendingLock; + EbrPendingEntry* m_pPendingHeads[EBR_EPOCHS] = {}; + Volatile m_pendingSizeInBytes; +}; + +// Global EBR collector for HashMap's async mode. +extern EbrCollector g_HashMapEbr; + +// RAII holder for EBR critical regions, analogous to GCX_COOP pattern. +// When fEnable is false, the holder is a no-op. +class EbrCriticalRegionHolder final +{ +public: + EbrCriticalRegionHolder(EbrCollector* pCollector, bool fEnable) + : m_pCollector(fEnable ? pCollector : nullptr) + { + WRAPPER_NO_CONTRACT; + if (m_pCollector != nullptr) + m_pCollector->EnterCriticalRegion(); + } + + ~EbrCriticalRegionHolder() + { + WRAPPER_NO_CONTRACT; + if (m_pCollector != nullptr) + m_pCollector->ExitCriticalRegion(); + } + + EbrCriticalRegionHolder(const EbrCriticalRegionHolder&) = delete; + EbrCriticalRegionHolder& operator=(const EbrCriticalRegionHolder&) = delete; + EbrCriticalRegionHolder(EbrCriticalRegionHolder&&) = delete; + EbrCriticalRegionHolder& operator=(EbrCriticalRegionHolder&&) = delete; + +private: + EbrCollector* m_pCollector; +}; + +#endif // __EBR_H__ diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp index 38ccdae6713a19..e9f701b169316c 100644 --- a/src/coreclr/vm/finalizerthread.cpp +++ b/src/coreclr/vm/finalizerthread.cpp @@ -9,6 +9,7 @@ #include "jithost.h" #include "genanalysis.h" #include "eventpipeadapter.h" +#include "ebr.h" #include "dn-stdio.h" #ifdef FEATURE_COMINTEROP @@ -151,7 +152,8 @@ bool FinalizerThread::HaveExtraWorkForFinalizer() || Thread::CleanupNeededForFinalizedThread() || YieldProcessorNormalization::IsMeasurementScheduled() || HasDelayedDynamicMethod() - || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads(); + || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads() + || g_HashMapEbr.CleanUpRequested(); #endif // TARGET_WASM } @@ -201,6 +203,12 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread) GCX_PREEMP(); CleanupDelayedDynamicMethods(); } + + if (g_HashMapEbr.CleanUpRequested()) + { + GCX_PREEMP(); + g_HashMapEbr.CleanUpPending(); + } } OBJECTREF FinalizerThread::GetNextFinalizableObject() diff --git a/src/coreclr/vm/hash.cpp b/src/coreclr/vm/hash.cpp index 288601b1e7e04e..4c565da2b6f78a 100644 --- a/src/coreclr/vm/hash.cpp +++ b/src/coreclr/vm/hash.cpp @@ -16,7 +16,7 @@ Module Name: #include "excep.h" -#include "syncclean.hpp" +#include "ebr.h" #include "threadsuspend.h" #include "minipal/time.h" @@ -91,9 +91,59 @@ BOOL Bucket::InsertValue(const UPTR key, const UPTR value) SetCollision(); // otherwise set the collision bit return false; } - #endif // !DACCESS_COMPILE +static DWORD GetSize(PTR_Bucket rgBuckets) +{ + LIMITED_METHOD_DAC_CONTRACT; + PTR_size_t pSize = dac_cast(rgBuckets - 1); + _ASSERTE(FitsIn(pSize[0])); + return static_cast(pSize[0]); +} + +static void SetSize(Bucket* rgBuckets, size_t size) +{ + LIMITED_METHOD_CONTRACT; + ((size_t*)rgBuckets)[0] = size; +} + +// Allocate a zero-initialized bucket array with space for 'size' buckets +// plus a leading size_t header. +static Bucket* AllocateBuckets(DWORD size) +{ + STATIC_CONTRACT_THROWS; + S_SIZE_T cbAlloc = (S_SIZE_T(size) + S_SIZE_T(1)) * S_SIZE_T(sizeof(Bucket)); + if (cbAlloc.IsOverflow()) + ThrowHR(COR_E_OVERFLOW); + Bucket* rgBuckets = (Bucket*) new BYTE[cbAlloc.Value()]; + memset(rgBuckets, 0, cbAlloc.Value()); + SetSize(rgBuckets, size); + return rgBuckets; +} + +// Free a bucket array allocated by AllocateBuckets. +static void FreeBuckets(Bucket* rgBuckets) +{ + LIMITED_METHOD_CONTRACT; + delete [] (BYTE*)rgBuckets; +} + +// Static helper for EBR deferred deletion of obsolete bucket arrays. +static void DeleteObsoleteBuckets(void* p) +{ + LIMITED_METHOD_CONTRACT; + FreeBuckets((Bucket*)p); +} + +// The +1 is because entries are 1 based since the first entry is a size field, not a bucket. +// See Buckets() method that works with the member variable m_rgBuckets. +// See GetSize() and SetSize() for how the size field is stored. +static PTR_Bucket GetBucketPointer(PTR_Bucket rgBuckets) +{ + LIMITED_METHOD_DAC_CONTRACT; + return rgBuckets + 1; +} + //--------------------------------------------------------------------- // inline Bucket* HashMap::Buckets() // get the pointer to the bucket array @@ -103,24 +153,11 @@ PTR_Bucket HashMap::Buckets() LIMITED_METHOD_DAC_CONTRACT; #if !defined(DACCESS_COMPILE) - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled() || IsGCThread()); + _ASSERTE (!m_fAsyncMode || g_HashMapEbr.InCriticalRegion()); #endif - return m_rgBuckets + 1; -} - -//--------------------------------------------------------------------- -// inline size_t HashMap::GetSize(PTR_Bucket rgBuckets) -// get the number of buckets -inline -DWORD HashMap::GetSize(PTR_Bucket rgBuckets) -{ - LIMITED_METHOD_DAC_CONTRACT; - PTR_size_t pSize = dac_cast(rgBuckets - 1); - _ASSERTE(FitsIn(pSize[0])); - return static_cast(pSize[0]); + return GetBucketPointer(m_rgBuckets); } - //--------------------------------------------------------------------- // inline size_t HashMap::HashFunction(UPTR key, UINT numBuckets, UINT &seed, UINT &incr) // get the first & second hash function. @@ -144,16 +181,6 @@ void HashMap::HashFunction(const UPTR key, const UINT numBuckets, UINT &seed, UI #ifndef DACCESS_COMPILE -//--------------------------------------------------------------------- -// inline void HashMap::SetSize(Bucket *rgBuckets, size_t size) -// set the number of buckets -inline -void HashMap::SetSize(Bucket *rgBuckets, size_t size) -{ - LIMITED_METHOD_CONTRACT; - ((size_t*)rgBuckets)[0] = size; -} - //--------------------------------------------------------------------- // HashMap::HashMap() // constructor, initialize all values @@ -268,10 +295,7 @@ void HashMap::Init(DWORD cbInitialSize, Compare* pCompare, BOOL fAsyncMode, Lock DWORD size = g_rgPrimes[m_iPrimeIndex]; _ASSERTE(size < 0x7fffffff); - m_rgBuckets = new Bucket[size+1]; - - memset (m_rgBuckets, 0, (size+1)*sizeof(Bucket)); - SetSize(m_rgBuckets, size); + m_rgBuckets = AllocateBuckets(size); m_pCompare = pCompare; @@ -354,7 +378,7 @@ void HashMap::Clear() STATIC_CONTRACT_FORBID_FAULT; // free the current table - delete [] m_rgBuckets; + FreeBuckets(m_rgBuckets); m_rgBuckets = NULL; } @@ -477,8 +501,9 @@ void HashMap::InsertValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + // Enter EBR critical region to protect against concurrent bucket array + // deletion during async mode. + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); @@ -542,14 +567,7 @@ UPTR HashMap::LookupValue(UPTR key, UPTR value) #ifndef DACCESS_COMPILE _ASSERTE (m_fAsyncMode || OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - // Also called by AppDomain::FindCachedAssembly to resolve AssemblyRef -- this is used by stack walking on the GC thread. - // See comments in GCHeapUtilities::RestartEE (above the call to SyncClean::CleanUp) for reason to enter COOP mode. - // However, if the current thread is the GC thread, we know we're not going to call GCHeapUtilities::RestartEE - // while accessing the HashMap, so it's safe to proceed. - // (m_fAsyncMode && !IsGCThread() is the condition for entering COOP mode. I.e., enable COOP GC only if - // the HashMap is in async mode and this is not a GC thread.) - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode && !IsGCThread()); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // This is necessary in case some other thread @@ -621,8 +639,7 @@ UPTR HashMap::ReplaceValue(UPTR key, UPTR value) _ASSERTE(OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // This is necessary in case some other thread @@ -695,8 +712,7 @@ UPTR HashMap::DeleteValue (UPTR key, UPTR value) _ASSERTE (OwnLock()); - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); // check proper use in synchronous mode SyncAccessHolder holoder(this); //no-op in non DEBUG code @@ -825,7 +841,7 @@ UPTR HashMap::PutEntry (Bucket* rgBuckets, UPTR key, UPTR value) // compute the new size based on the number of free slots // inline -UPTR HashMap::NewSize() +UPTR HashMap::NewSize() const { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -867,10 +883,9 @@ void HashMap::Rehash() STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_FAULT; - // BROKEN: This is called for the RCWCache on the GC thread - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); - _ASSERTE (!g_fEEStarted || !m_fAsyncMode || GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled()); + _ASSERTE (!m_fAsyncMode || g_HashMapEbr.InCriticalRegion()); _ASSERTE (OwnLock()); UPTR newPrimeIndex = NewSize(); @@ -882,46 +897,31 @@ void HashMap::Rehash() return; } - m_iPrimeIndex = newPrimeIndex; - - DWORD cbNewSize = g_rgPrimes[m_iPrimeIndex]; - - Bucket* rgBuckets = Buckets(); - UPTR cbCurrSize = GetSize(rgBuckets); - - S_SIZE_T cbNewBuckets = (S_SIZE_T(cbNewSize) + S_SIZE_T(1)) * S_SIZE_T(sizeof(Bucket)); - - if (cbNewBuckets.IsOverflow()) - ThrowHR(COR_E_OVERFLOW); - - Bucket* rgNewBuckets = (Bucket *) new BYTE[cbNewBuckets.Value()]; - memset (rgNewBuckets, 0, cbNewBuckets.Value()); - SetSize(rgNewBuckets, cbNewSize); + // Collect the current bucket state. + Bucket* rgCurrentBuckets = Buckets(); + DWORD currentBucketsSize = GetSize(rgCurrentBuckets); - // current valid slots - UPTR cbValidSlots = m_cbInserts-m_cbDeletes; - m_cbInserts = cbValidSlots; // reset insert count to the new valid count - m_cbPrevSlotsInUse = cbValidSlots; // track the previous delete count - m_cbDeletes = 0; // reset delete count - // rehash table into it + // Allocate a new array of buckets. + const DWORD cbNewSize = g_rgPrimes[newPrimeIndex]; + Bucket* rgNewBuckets = AllocateBuckets(cbNewSize); + // Rehash table into new buckets. + UPTR cbValidSlots = m_cbInserts - m_cbDeletes; + const UPTR cbValidSlotsInit = cbValidSlots; if (cbValidSlots) // if there are valid slots to be rehashed { - for (unsigned int nb = 0; nb < cbCurrSize; nb++) + for (DWORD nb = 0; nb < currentBucketsSize; nb++) { - for (unsigned int i = 0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { - UPTR key =rgBuckets[nb].m_rgKeys[i]; + UPTR key = rgCurrentBuckets[nb].m_rgKeys[i]; if (key > DELETED) { + UPTR ntry = PutEntry(GetBucketPointer(rgNewBuckets), key, rgCurrentBuckets[nb].GetValue (i)); #ifdef HASHTABLE_PROFILE - UPTR ntry = -#endif - PutEntry (rgNewBuckets+1, key, rgBuckets[nb].GetValue (i)); - #ifdef HASHTABLE_PROFILE - if(ntry >=8) - m_cbInsertProbesGt8++; - #endif // HASHTABLE_PROFILE + if(ntry >=8) + m_cbInsertProbesGt8++; +#endif // HASHTABLE_PROFILE // check if we can bail out if (--cbValidSlots == 0) @@ -931,41 +931,67 @@ void HashMap::Rehash() } //for all buckets } - LDone: - Bucket* pObsoleteTables = m_rgBuckets; + // Capture the current buckets pointer for later deletion if needed. + // See the Buckets() APIs for why the field is used directly. + void* pObsoleteBucketsAlloc = m_rgBuckets; + if (m_fAsyncMode) + { + // In async mode, readers may still be traversing the old bucket array. + // Queue for deferred deletion via EBR. The buckets will be freed once + // all threads have exited their critical regions or later. + // If we fail to queue for deletion, throw an OOM. + size_t obsoleteSize = currentBucketsSize; + if (!g_HashMapEbr.QueueForDeletion( + pObsoleteBucketsAlloc, + DeleteObsoleteBuckets, + (obsoleteSize + 1) * sizeof(Bucket))) // See AllocateBuckets for +1 + { + // If we fail to queue for deletion, free the new allocation before throwing OOM. + FreeBuckets(rgNewBuckets); + ThrowOutOfMemory(); + } + } + + // Rename the variable names so it is clear their state. + Bucket* obsoleteBuckets = rgCurrentBuckets; + DWORD obsoleteBucketsSize = currentBucketsSize; + rgCurrentBuckets = NULL; + currentBucketsSize = 0; // memory barrier, to replace the pointer to array of bucket MemoryBarrier(); - // replace the old array with the new one. + // Update the HashMap state m_rgBuckets = rgNewBuckets; + m_iPrimeIndex = newPrimeIndex; + m_cbInserts = cbValidSlotsInit; // reset insert count to the new valid count + m_cbPrevSlotsInUse = cbValidSlotsInit; // track the previous delete count + m_cbDeletes = 0; // reset delete count - #ifdef HASHTABLE_PROFILE - m_cbRehash++; - m_cbRehashSlots+=m_cbInserts; - m_cbObsoleteTables++; // track statistics - m_cbTotalBuckets += (cbNewSize+1); - #endif // HASHTABLE_PROFILE +#ifdef HASHTABLE_PROFILE + m_cbRehash++; + m_cbRehashSlots += m_cbInserts; + m_cbObsoleteTables++; // track statistics + m_cbTotalBuckets += (cbNewSize + 1); // +1 for the size field. See AllocateBuckets for details. +#endif // HASHTABLE_PROFILE #ifdef _DEBUG - - unsigned nb; + DWORD nb; if (m_fAsyncMode) { // for all non deleted keys in the old table, make sure the corresponding values // are in the new lookup table - - for (nb = 1; nb <= ((size_t*)pObsoleteTables)[0]; nb++) + for (nb = 0; nb < obsoleteBucketsSize; nb++) { - for (unsigned int i =0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { - if (pObsoleteTables[nb].m_rgKeys[i] > DELETED) + if (obsoleteBuckets[nb].m_rgKeys[i] > DELETED) { - UPTR value = pObsoleteTables[nb].GetValue (i); + UPTR value = obsoleteBuckets[nb].GetValue (i); // make sure the value is present in the new table - ASSERT (m_pCompare != NULL || value == LookupValue (pObsoleteTables[nb].m_rgKeys[i], value)); + ASSERT (m_pCompare != NULL || value == LookupValue (obsoleteBuckets[nb].m_rgKeys[i], value)); } } } @@ -975,7 +1001,7 @@ void HashMap::Rehash() // if the compare function provided is null, then keys must be unique for (nb = 0; nb < cbNewSize; nb++) { - for (unsigned int i = 0; i < SLOTS_PER_BUCKET; i++) + for (DWORD i = 0; i < SLOTS_PER_BUCKET; i++) { UPTR keyv = Buckets()[nb].m_rgKeys[i]; ASSERT (keyv != DELETED); @@ -987,21 +1013,11 @@ void HashMap::Rehash() } #endif // _DEBUG - if (m_fAsyncMode) + // If non async mode, we can delete the old buckets immediately since no readers can be traversing it. + if (!m_fAsyncMode) { - // If we are allowing asynchronous reads, we must delay bucket cleanup until GC time. - SyncClean::AddHashMap (pObsoleteTables); + DeleteObsoleteBuckets(pObsoleteBucketsAlloc); } - else - { - Bucket* pBucket = pObsoleteTables; - while (pBucket) { - Bucket* pNextBucket = NextObsolete(pBucket); - delete [] pBucket; - pBucket = pNextBucket; - } - } - } //--------------------------------------------------------------------- @@ -1020,7 +1036,7 @@ void HashMap::Compact() _ASSERTE (OwnLock()); // - GCX_MAYBE_COOP_NO_THREAD_BROKEN(m_fAsyncMode); + EbrCriticalRegionHolder ebrHolder(&g_HashMapEbr, m_fAsyncMode); ASSERT(m_rgBuckets != NULL); // Try to resize if that makes sense (reduce the size of the table), but @@ -1228,10 +1244,10 @@ void HashMap::LookupPerfTest(HashMap * table, const unsigned int MinThreshold) table->LookupValue(i, i); //cout << "Lookup perf test (1000 * " << MinThreshold << ": " << (t1-t0) << " ms." << endl; #ifdef HASHTABLE_PROFILE - minipal_log_print_info("Lookup perf test time: %d ms table size: %d max failure probe: %d longest collision chain: %d\n", (int) (t1-t0), (int) table->GetSize(table->Buckets()), (int) table->maxFailureProbe, (int) table->m_cbMaxCollisionLength); + minipal_log_print_info("Lookup perf test time: %d ms table size: %d max failure probe: %d longest collision chain: %d\n", (int) (t1-t0), (int) GetSize(table->Buckets()), (int) table->maxFailureProbe, (int) table->m_cbMaxCollisionLength); table->DumpStatistics(); #else // !HASHTABLE_PROFILE - minipal_log_print_info("Lookup perf test time: %d ms table size: %d\n", (int) (t1-t0), table->GetSize(table->Buckets())); + minipal_log_print_info("Lookup perf test time: %d ms table size: %d\n", (int) (t1-t0), GetSize(table->Buckets())); #endif // !HASHTABLE_PROFILE } #endif // !DACCESS_COMPILE diff --git a/src/coreclr/vm/hash.h b/src/coreclr/vm/hash.h index a3d1fcd3dcf955..99a6a640c87a6c 100644 --- a/src/coreclr/vm/hash.h +++ b/src/coreclr/vm/hash.h @@ -347,11 +347,9 @@ class HashMap // compute the new size, based on the number of free slots // available, compact or expand - UPTR NewSize(); + UPTR NewSize() const; // create a new bucket array and rehash the non-deleted entries void Rehash(); - static DWORD GetSize(PTR_Bucket rgBuckets); - static void SetSize(Bucket* rgBuckets, size_t size); PTR_Bucket Buckets(); UPTR CompareValues(const UPTR value1, const UPTR value2); @@ -793,15 +791,4 @@ class PtrHashMap #endif // DACCESS_COMPILE }; -//--------------------------------------------------------------------- -// inline Bucket*& NextObsolete (Bucket* rgBuckets) -// get the next obsolete bucket in the chain -inline -Bucket*& NextObsolete (Bucket* rgBuckets) -{ - LIMITED_METHOD_CONTRACT; - - return *(Bucket**)&((size_t*)rgBuckets)[1]; -} - #endif // !_HASH_H_ diff --git a/src/coreclr/vm/nativeimage.cpp b/src/coreclr/vm/nativeimage.cpp index 1d6a0593d09897..2be7199c172029 100644 --- a/src/coreclr/vm/nativeimage.cpp +++ b/src/coreclr/vm/nativeimage.cpp @@ -63,6 +63,8 @@ NativeImage::NativeImage(AssemblyBinder *pAssemblyBinder, ReadyToRunLoadedImage void NativeImage::Initialize(READYTORUN_HEADER *pHeader, LoaderAllocator *pLoaderAllocator, AllocMemTracker *pamTracker) { + STANDARD_VM_CONTRACT; + LoaderHeap *pHeap = pLoaderAllocator->GetHighFrequencyHeap(); m_pReadyToRunInfo = new ReadyToRunInfo(/*pModule*/ NULL, pLoaderAllocator, pHeader, this, m_pImageLayout, pamTracker); diff --git a/src/coreclr/vm/readytoruninfo.cpp b/src/coreclr/vm/readytoruninfo.cpp index 3cd42e18b8ff5a..cb5c93fd792b06 100644 --- a/src/coreclr/vm/readytoruninfo.cpp +++ b/src/coreclr/vm/readytoruninfo.cpp @@ -386,18 +386,12 @@ void ReadyToRunInfo::SetMethodDescForEntryPointInNativeImage(PCODE entryPoint, M { CONTRACTL { + STANDARD_VM_CHECK; PRECONDITION(!m_isComponentAssembly); } CONTRACTL_END; - // We are entering coop mode here so that we don't do it later inside LookupMap while we are already holding the Crst. - // Doing it in the other order can block the debugger from running func-evals. For example thread A would acquire the Crst, - // then block at the coop transition inside LookupMap waiting for the debugger to resume from a break state. The debugger then - // requests thread B to run a funceval, the funceval tries to load some R2R method calling in here, then it blocks because - // thread A is holding the Crst. - GCX_COOP(); CrstHolder ch(&m_Crst); - if ((TADDR)m_entryPointToMethodDescMap.LookupValue(PCODEToPINSTR(entryPoint), (LPVOID)PCODEToPINSTR(entryPoint)) == (TADDR)INVALIDENTRY) { m_entryPointToMethodDescMap.InsertValue(PCODEToPINSTR(entryPoint), methodDesc); @@ -774,7 +768,7 @@ ReadyToRunInfo::ReadyToRunInfo(Module * pModule, LoaderAllocator* pLoaderAllocat m_pHeader(pHeader), m_pNativeImage(pModule != NULL ? pNativeImage: NULL), // m_pNativeImage is only set for composite image components, not the composite R2R info itself m_readyToRunCodeDisabled(FALSE), - m_Crst(CrstReadyToRunEntryPointToMethodDescMap, CRST_UNSAFE_COOPGC), + m_Crst(CrstReadyToRunEntryPointToMethodDescMap), m_pPersistentInlineTrackingMap(NULL), m_pNextR2RForUnrelatedCode(NULL) { diff --git a/src/coreclr/vm/syncclean.cpp b/src/coreclr/vm/syncclean.cpp index 2723047f0901dc..f0d3bdbd59c165 100644 --- a/src/coreclr/vm/syncclean.cpp +++ b/src/coreclr/vm/syncclean.cpp @@ -12,7 +12,6 @@ #include "interpexec.h" #endif -VolatilePtr SyncClean::m_HashMap = NULL; VolatilePtr SyncClean::m_EEHashTable; void SyncClean::Terminate() @@ -25,26 +24,6 @@ void SyncClean::Terminate() CleanUp(); } -void SyncClean::AddHashMap (Bucket *bucket) -{ - WRAPPER_NO_CONTRACT; - - if (!g_fEEStarted) { - delete [] bucket; - return; - } - - _ASSERTE (GetThreadNULLOk() == NULL || GetThread()->PreemptiveGCDisabled()); - - Bucket * pTempBucket = NULL; - do - { - pTempBucket = (Bucket *)m_HashMap; - NextObsolete (bucket) = pTempBucket; - } - while (InterlockedCompareExchangeT(m_HashMap.GetPointer(), bucket, pTempBucket) != pTempBucket); -} - void SyncClean::AddEEHashTable (EEHashEntry** entry) { WRAPPER_NO_CONTRACT; @@ -73,17 +52,6 @@ void SyncClean::CleanUp () _ASSERTE (IsAtProcessExit() || IsGCSpecialThread() || (GCHeapUtilities::IsGCInProgress() && GetThreadNULLOk() == ThreadSuspend::GetSuspensionThread())); - if (m_HashMap) - { - Bucket * pTempBucket = InterlockedExchangeT(m_HashMap.GetPointer(), NULL); - - while (pTempBucket) - { - Bucket* pNextBucket = NextObsolete (pTempBucket); - delete [] pTempBucket; - pTempBucket = pNextBucket; - } - } if (m_EEHashTable) { diff --git a/src/coreclr/vm/syncclean.hpp b/src/coreclr/vm/syncclean.hpp index c203b7245d103c..7b8761c76c765b 100644 --- a/src/coreclr/vm/syncclean.hpp +++ b/src/coreclr/vm/syncclean.hpp @@ -9,21 +9,17 @@ // To make this work, we need to make sure that these data are accessed in cooperative GC // mode. -class Bucket; struct EEHashEntry; -class Crst; -class CrstStatic; -class SyncClean { +class SyncClean final +{ public: static void Terminate (); - static void AddHashMap (Bucket *bucket); static void AddEEHashTable (EEHashEntry** entry); static void CleanUp (); private: - static VolatilePtr m_HashMap; // Cleanup list for HashMap static VolatilePtr m_EEHashTable; // Cleanup list for EEHashTable }; #endif