From 2b2ca520b5feecc949cf001a66b2d02b025777e4 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 8 Jan 2025 15:47:32 -0800 Subject: [PATCH 01/41] Move the cached interface dispatch code into a shared region --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 6 ++++-- .../Runtime => shared_runtime}/CachedInterfaceDispatch.cpp | 0 .../Runtime => shared_runtime}/CachedInterfaceDispatch.h | 0 .../Runtime => shared_runtime}/amd64/StubDispatch.S | 0 .../Runtime => shared_runtime}/amd64/StubDispatch.asm | 0 .../Runtime => shared_runtime}/arm/StubDispatch.S | 0 .../Runtime => shared_runtime}/arm64/StubDispatch.S | 0 .../Runtime => shared_runtime}/arm64/StubDispatch.asm | 0 .../Runtime => shared_runtime}/i386/StubDispatch.S | 0 .../Runtime => shared_runtime}/i386/StubDispatch.asm | 0 .../Runtime => shared_runtime}/loongarch64/StubDispatch.S | 0 11 files changed, 4 insertions(+), 2 deletions(-) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/CachedInterfaceDispatch.cpp (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/CachedInterfaceDispatch.h (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/amd64/StubDispatch.S (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/amd64/StubDispatch.asm (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/arm/StubDispatch.S (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/arm64/StubDispatch.S (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/arm64/StubDispatch.asm (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/i386/StubDispatch.S (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/i386/StubDispatch.asm (100%) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/loongarch64/StubDispatch.S (100%) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 752844e740d8f4..86ae207554e8cb 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,9 +1,10 @@ set(GC_DIR ../../gc) +set(SHARED_RUNTIME_DIR ../../shared_runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp - CachedInterfaceDispatch.cpp + ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp Crst.cpp DebugHeader.cpp MethodTable.cpp @@ -76,6 +77,7 @@ include_directories(.) include_directories(${GC_DIR}) include_directories(${GC_DIR}/env) include_directories(${CMAKE_CURRENT_BINARY_DIR}/eventpipe/inc) +include_directories(${SHARED_RUNTIME_DIR}) if (WIN32) set(GC_HEADERS @@ -208,7 +210,7 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/MiscStubs.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/PInvoke.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/InteropThunksHelpers.${ASM_SUFFIX} - ${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/UniversalTransition.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp similarity index 100% rename from src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp rename to src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h similarity index 100% rename from src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h rename to src/coreclr/shared_runtime/CachedInterfaceDispatch.h diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S b/src/coreclr/shared_runtime/amd64/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S rename to src/coreclr/shared_runtime/amd64/StubDispatch.S diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm b/src/coreclr/shared_runtime/amd64/StubDispatch.asm similarity index 100% rename from src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm rename to src/coreclr/shared_runtime/amd64/StubDispatch.asm diff --git a/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S b/src/coreclr/shared_runtime/arm/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/arm/StubDispatch.S rename to src/coreclr/shared_runtime/arm/StubDispatch.S diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S b/src/coreclr/shared_runtime/arm64/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S rename to src/coreclr/shared_runtime/arm64/StubDispatch.S diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm b/src/coreclr/shared_runtime/arm64/StubDispatch.asm similarity index 100% rename from src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm rename to src/coreclr/shared_runtime/arm64/StubDispatch.asm diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.S b/src/coreclr/shared_runtime/i386/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/i386/StubDispatch.S rename to src/coreclr/shared_runtime/i386/StubDispatch.S diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/shared_runtime/i386/StubDispatch.asm similarity index 100% rename from src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm rename to src/coreclr/shared_runtime/i386/StubDispatch.asm diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/shared_runtime/loongarch64/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S rename to src/coreclr/shared_runtime/loongarch64/StubDispatch.S From 489267437187fd2a0036441f5419be11d290399e Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 14 Jan 2025 14:15:18 -0800 Subject: [PATCH 02/41] Split cached interface dispatch up into a component which is focussed on shared things, and parts that are not shared --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 1 + .../Runtime/CachedInterfaceDispatchAot.cpp | 59 +++++++++++++++ .../Runtime/CachedInterfaceDispatchPal.h | 26 +++++++ src/coreclr/nativeaot/Runtime/SyncClean.cpp | 2 +- src/coreclr/nativeaot/Runtime/startup.cpp | 2 +- .../CachedInterfaceDispatch.cpp | 73 ++----------------- .../shared_runtime/CachedInterfaceDispatch.h | 26 ++++++- 7 files changed, 119 insertions(+), 70 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp create mode 100644 src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 86ae207554e8cb..278790c3a5cd56 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -5,6 +5,7 @@ set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchAot.cpp Crst.cpp DebugHeader.cpp MethodTable.cpp diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp new file mode 100644 index 00000000000000..1898dd058d5bf9 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" +#include "RedhawkWarnings.h" +#include "TargetPtrs.h" +#include "MethodTable.h" +#include "Range.h" +#include "allocheap.h" + +// The base memory allocator. +static AllocHeap * g_pAllocHeap = NULL; + +bool InterfaceDispatch_InitializePal() +{ + g_pAllocHeap = new (nothrow) AllocHeap(); + if (g_pAllocHeap == NULL) + return false; + + if (!g_pAllocHeap->Init()) + return false; + + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return g_pAllocHeap->AllocAligned(size, sizeof(void*) * 2); +} + +// Allocate memory aligned at at least sizeof(void*) +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return g_pAllocHeap->Alloc(size); +} + +FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) +{ + return InterfaceDispatch_UpdateDispatchCellCache(pCell, pTargetCode, pInstanceType, pNewCellInfo); +} +FCIMPLEND + +FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType) +{ + return InterfaceDispatch_SearchDispatchCellCache(pCell, pInstanceType); +} +FCIMPLEND + +// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented +// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed +// code due to its use of the GC state as a lock, and as lifetime control +FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo) +{ + *pDispatchCellInfo = pCell->GetDispatchCellInfo(); +} +FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h new file mode 100644 index 00000000000000..938a82a97cb293 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "rhbinder.h" + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at at least sizeof(void*) +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/SyncClean.cpp b/src/coreclr/nativeaot/Runtime/SyncClean.cpp index 8204193f50f140..a856a584744acd 100644 --- a/src/coreclr/nativeaot/Runtime/SyncClean.cpp +++ b/src/coreclr/nativeaot/Runtime/SyncClean.cpp @@ -23,6 +23,6 @@ void SyncClean::CleanUp () { #ifdef FEATURE_CACHED_INTERFACE_DISPATCH // Update any interface dispatch caches that were unsafe to modify outside of this GC. - ReclaimUnusedInterfaceDispatchCaches(); + InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); #endif } diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index af835018e1823a..1875fdc671a991 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -95,7 +95,7 @@ static bool InitDLL(HANDLE hPalInstance) // // Initialize interface dispatch. // - if (!InitializeInterfaceDispatch()) + if (!InterfaceDispatch_Initialize()) return false; #endif diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp index 2938ee70974073..c1fc02caaa05cb 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp @@ -8,30 +8,7 @@ // ============================================================================ #include "common.h" #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -#include "CommonTypes.h" -#include "CommonMacros.h" -#include "daccess.h" -#include "DebugMacrosExt.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" -#include "rhassert.h" -#include "slist.h" -#include "holder.h" -#include "Crst.h" -#include "RedhawkWarnings.h" -#include "TargetPtrs.h" -#include "MethodTable.h" -#include "Range.h" -#include "allocheap.h" -#include "rhbinder.h" -#include "ObjectLayout.h" -#include "shash.h" -#include "TypeManager.h" -#include "RuntimeInstance.h" -#include "MethodTable.inl" -#include "CommonMacros.inl" - +#include "CachedInterfaceDispatchPal.h" #include "CachedInterfaceDispatch.h" // We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support, @@ -212,9 +189,6 @@ static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1]; // it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes). static CrstStatic g_sListLock; -// The base memory allocator. -static AllocHeap * g_pAllocHeap = NULL; - // Each cache size has an associated stub used to perform lookup over that cache. extern "C" void RhpInterfaceDispatch1(); extern "C" void RhpInterfaceDispatch2(); @@ -299,9 +273,8 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * if (pCache == NULL) { // No luck with the free list, allocate the cache from via the AllocHeap. - pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) + - (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries), - sizeof(void*) * 2); + pCache = (InterfaceDispatchCache*)InterfaceDispatch_AllocDoublePointerAligned(sizeof(InterfaceDispatchCache) + + (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries)); if (pCache == NULL) return (uintptr_t)NULL; @@ -365,7 +338,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache) if (pDiscardedCacheBlock != NULL) g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext; else - pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock)); + pDiscardedCacheBlock = (DiscardedCacheBlock *)InterfaceDispatch_AllocPointerAligned(sizeof(DiscardedCacheBlock)); if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block { @@ -379,7 +352,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache) // Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed) // and sort the results into the free lists we maintain for each cache size. -void ReclaimUnusedInterfaceDispatchCaches() +void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches() { // No need for any locks, we're not racing with any other threads any more. @@ -431,13 +404,9 @@ void ReclaimUnusedInterfaceDispatchCaches() } // One time initialization of interface dispatch. -bool InitializeInterfaceDispatch() +bool InterfaceDispatch_Initialize() { - g_pAllocHeap = new (nothrow) AllocHeap(); - if (g_pAllocHeap == NULL) - return false; - - if (!g_pAllocHeap->Init()) + if (!InterfaceDispatch_InitializePal()) return false; g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT); @@ -445,7 +414,7 @@ bool InitializeInterfaceDispatch() return true; } -FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) +PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) { // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state // is none). @@ -515,31 +484,5 @@ FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE return (PCODE)pTargetCode; } -FCIMPLEND - -FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType) -{ - // This function must be implemented in native code so that we do not take a GC while walking the cache - InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); - if (pCache != NULL) - { - InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; - for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) - if (pCacheEntry->m_pInstanceType == pInstanceType) - return pCacheEntry->m_pTargetCode; - } - - return (PCODE)nullptr; -} -FCIMPLEND - -// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented -// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed -// code due to its use of the GC state as a lock, and as lifetime control -FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo) -{ - *pDispatchCellInfo = pCell->GetDispatchCellInfo(); -} -FCIMPLEND #endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h index ea0f7841164be1..bc85a4e1cf8bb0 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h @@ -7,10 +7,10 @@ // // ============================================================================ -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#ifndef __CACHEDINTERFACEDISPATCH_H__ +#define __CACHEDINTERFACEDISPATCH_H__ -bool InitializeInterfaceDispatch(); -void ReclaimUnusedInterfaceDispatchCaches(); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH // Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub // that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment @@ -46,4 +46,24 @@ struct InterfaceDispatchCache }; #pragma warning(pop) +bool InterfaceDispatch_Initialize(); +PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo); +void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); +PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType) +{ + // This function must be implemented in native code so that we do not take a GC while walking the cache + InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); + if (pCache != NULL) + { + InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; + for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) + if (pCacheEntry->m_pInstanceType == pInstanceType) + return pCacheEntry->m_pTargetCode; + } + + return (PCODE)nullptr; +} + #endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#endif // __CACHEDINTERFACEDISPATCH_H__ \ No newline at end of file From d69528f930355df9d824d6a87d54721893f2645d Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Thu, 16 Jan 2025 11:12:55 -0800 Subject: [PATCH 03/41] It builds for X64, VTable stuff isn't probably correct, but its basically in place --- src/coreclr/inc/CrstTypes.def | 3 + src/coreclr/inc/crsttypes_generated.h | 123 ++++++------ src/coreclr/jit/compiler.h | 12 +- .../nativeaot/Runtime/amd64/MiscStubs.asm | 28 +++ src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 5 + .../CachedInterfaceDispatch.cpp | 5 +- .../shared_runtime/CachedInterfaceDispatch.h | 3 +- .../shared_runtime/amd64/StubDispatch.asm | 45 +---- src/coreclr/vm/CMakeLists.txt | 13 ++ .../vm/CachedInterfaceDispatchCoreclr.cpp | 21 ++ src/coreclr/vm/CachedInterfaceDispatchPal.h | 189 ++++++++++++++++++ src/coreclr/vm/amd64/AsmMacros_Shared.inc | 7 + src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 46 +++++ src/coreclr/vm/amd64/asmconstants.h | 8 + src/coreclr/vm/ceeload.cpp | 3 + src/coreclr/vm/virtualcallstub.cpp | 122 +++++++++++ src/coreclr/vm/virtualcallstub.h | 10 +- 17 files changed, 527 insertions(+), 116 deletions(-) create mode 100644 src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp create mode 100644 src/coreclr/vm/CachedInterfaceDispatchPal.h create mode 100644 src/coreclr/vm/amd64/AsmMacros_Shared.inc diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index f51399812a0826..aee2bb64a48ff3 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -532,3 +532,6 @@ End Crst PerfMap AcquiredAfter CodeVersioning AssemblyList End + +Crst InterfaceDispatchGlobalLists +End \ No newline at end of file diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index 8fde9aa204abf9..620eb3552d2144 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -60,66 +60,67 @@ enum CrstType CrstILStubGen = 42, CrstInlineTrackingMap = 43, CrstInstMethodHashTable = 44, - CrstInterop = 45, - CrstInteropData = 46, - CrstIsJMCMethod = 47, - CrstISymUnmanagedReader = 48, - CrstJit = 49, - CrstJitInlineTrackingMap = 50, - CrstJitPatchpoint = 51, - CrstJumpStubCache = 52, - CrstLeafLock = 53, - CrstListLock = 54, - CrstLoaderAllocator = 55, - CrstLoaderAllocatorReferences = 56, - CrstLoaderHeap = 57, - CrstManagedObjectWrapperMap = 58, - CrstMethodDescBackpatchInfoTracker = 59, - CrstMethodTableExposedObject = 60, - CrstModule = 61, - CrstModuleLookupTable = 62, - CrstMulticoreJitHash = 63, - CrstMulticoreJitManager = 64, - CrstNativeImageEagerFixups = 65, - CrstNativeImageLoad = 66, - CrstNotifyGdb = 67, - CrstPEImage = 68, - CrstPendingTypeLoadEntry = 69, - CrstPerfMap = 70, - CrstPgoData = 71, - CrstPinnedByrefValidation = 72, - CrstPinnedHeapHandleTable = 73, - CrstProfilerGCRefDataFreeList = 74, - CrstProfilingAPIStatus = 75, - CrstRCWCache = 76, - CrstRCWCleanupList = 77, - CrstReadyToRunEntryPointToMethodDescMap = 78, - CrstReflection = 79, - CrstReJITGlobalRequest = 80, - CrstRetThunkCache = 81, - CrstSigConvert = 82, - CrstSingleUseLock = 83, - CrstStressLog = 84, - CrstStubCache = 85, - CrstStubDispatchCache = 86, - CrstSyncBlockCache = 87, - CrstSyncHashLock = 88, - CrstSystemDomain = 89, - CrstSystemDomainDelayedUnloadList = 90, - CrstThreadIdDispenser = 91, - CrstThreadLocalStorageLock = 92, - CrstThreadStore = 93, - CrstTieredCompilation = 94, - CrstTypeEquivalenceMap = 95, - CrstTypeIDMap = 96, - CrstUMEntryThunkCache = 97, - CrstUMEntryThunkFreeListLock = 98, - CrstUniqueStack = 99, - CrstUnresolvedClassLock = 100, - CrstUnwindInfoTableLock = 101, - CrstVSDIndirectionCellLock = 102, - CrstWrapperTemplate = 103, - kNumberOfCrstTypes = 104 + CrstInterfaceDispatchGlobalLists = 45, + CrstInterop = 46, + CrstInteropData = 47, + CrstIsJMCMethod = 48, + CrstISymUnmanagedReader = 49, + CrstJit = 50, + CrstJitInlineTrackingMap = 51, + CrstJitPatchpoint = 52, + CrstJumpStubCache = 53, + CrstLeafLock = 54, + CrstListLock = 55, + CrstLoaderAllocator = 56, + CrstLoaderAllocatorReferences = 57, + CrstLoaderHeap = 58, + CrstManagedObjectWrapperMap = 59, + CrstMethodDescBackpatchInfoTracker = 60, + CrstMethodTableExposedObject = 61, + CrstModule = 62, + CrstModuleLookupTable = 63, + CrstMulticoreJitHash = 64, + CrstMulticoreJitManager = 65, + CrstNativeImageEagerFixups = 66, + CrstNativeImageLoad = 67, + CrstNotifyGdb = 68, + CrstPEImage = 69, + CrstPendingTypeLoadEntry = 70, + CrstPerfMap = 71, + CrstPgoData = 72, + CrstPinnedByrefValidation = 73, + CrstPinnedHeapHandleTable = 74, + CrstProfilerGCRefDataFreeList = 75, + CrstProfilingAPIStatus = 76, + CrstRCWCache = 77, + CrstRCWCleanupList = 78, + CrstReadyToRunEntryPointToMethodDescMap = 79, + CrstReflection = 80, + CrstReJITGlobalRequest = 81, + CrstRetThunkCache = 82, + CrstSigConvert = 83, + CrstSingleUseLock = 84, + CrstStressLog = 85, + CrstStubCache = 86, + CrstStubDispatchCache = 87, + CrstSyncBlockCache = 88, + CrstSyncHashLock = 89, + CrstSystemDomain = 90, + CrstSystemDomainDelayedUnloadList = 91, + CrstThreadIdDispenser = 92, + CrstThreadLocalStorageLock = 93, + CrstThreadStore = 94, + CrstTieredCompilation = 95, + CrstTypeEquivalenceMap = 96, + CrstTypeIDMap = 97, + CrstUMEntryThunkCache = 98, + CrstUMEntryThunkFreeListLock = 99, + CrstUniqueStack = 100, + CrstUnresolvedClassLock = 101, + CrstUnwindInfoTableLock = 102, + CrstVSDIndirectionCellLock = 103, + CrstWrapperTemplate = 104, + kNumberOfCrstTypes = 105 }; #endif // __CRST_TYPES_INCLUDED @@ -175,6 +176,7 @@ int g_rgCrstLevelMap[] = 6, // CrstILStubGen 2, // CrstInlineTrackingMap 18, // CrstInstMethodHashTable + 0, // CrstInterfaceDispatchGlobalLists 21, // CrstInterop 9, // CrstInteropData 0, // CrstIsJMCMethod @@ -284,6 +286,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstILStubGen", "CrstInlineTrackingMap", "CrstInstMethodHashTable", + "CrstInterfaceDispatchGlobalLists", "CrstInterop", "CrstInteropData", "CrstIsJMCMethod", diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 626e6edfa38c9a..37e3fd608c6bde 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8471,16 +8471,8 @@ class Compiler reg = REG_EAX; regMask = RBM_EAX; #elif defined(TARGET_AMD64) - if (isNativeAOT) - { - reg = REG_R10; - regMask = RBM_R10; - } - else - { - reg = REG_R11; - regMask = RBM_R11; - } + reg = REG_R11; + regMask = RBM_R11; #elif defined(TARGET_ARM) if (isNativeAOT) { diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm index 098c402b2106ee..2f18ce28227bc1 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm @@ -3,6 +3,9 @@ include AsmMacros.inc +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The following helper will access ("probe") a word on each page of the stack ; starting with the page right beneath rsp down to the one pointed to by r11. @@ -37,4 +40,29 @@ ProbeLoop: LEAF_END RhpStackProbe, _TEXT +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load rax to point to the vtable offset (which is stored in the m_pCache field). + mov rax, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable into rax + mov rax, [rax] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ;; r11 contains indirection cell address + lea r10, RhpCidResolve + jmp RhpUniversalTransition_DebugStepTailCall + +LEAF_END RhpInterfaceDispatchSlow, _TEXT + end diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 84f10d2257ea57..5f131132d8ce76 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -28,6 +28,11 @@ struct DispatchCellInfo uint8_t HasCache = 0; uint32_t MetadataToken = 0; uint32_t VTableOffset = 0; + + uint32_t GetVTableOffset() + { + return VTableOffset; + } }; struct InterfaceDispatchCacheHeader diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp index c1fc02caaa05cb..374af5dc33bc22 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp @@ -243,10 +243,9 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * { if (pNewCellInfo->CellType == DispatchCellType::VTableOffset) { - ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne); *ppStub = (void *)&RhpVTableOffsetDispatch; - ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset)); - return pNewCellInfo->VTableOffset; + ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->GetVTableOffset())); + return pNewCellInfo->GetVTableOffset(); } ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE)); diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h index bc85a4e1cf8bb0..9e58d3e53e72b6 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h @@ -49,7 +49,8 @@ struct InterfaceDispatchCache bool InterfaceDispatch_Initialize(); PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo); void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); -PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType) + +inline PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType) { // This function must be implemented in native code so that we do not take a GC while walking the cache InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.asm b/src/coreclr/shared_runtime/amd64/StubDispatch.asm index b93d948ad5d1e1..647044e5c8459f 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.asm +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.asm @@ -1,21 +1,19 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -include AsmMacros.inc +include AsmMacros_Shared.inc ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -EXTERN RhpCidResolve : PROC -EXTERN RhpUniversalTransition_DebugStepTailCall : PROC +EXTERN RhpInterfaceDispatchSlow : PROC ;; Macro that generates code to check a single cache entry. CHECK_CACHE_ENTRY macro entry NextLabel textequ @CatStr( Attempt, %entry+1 ) - cmp rax, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] + cmp rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] jne NextLabel - jmp qword ptr [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] + jmp qword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] NextLabel: endm @@ -32,8 +30,8 @@ LEAF_ENTRY StubName, _TEXT ;inc [CID_g_cInterfaceDispatches] ;; r10 currently contains the indirection cell address. - ;; load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + ;; load r10 to point to the cache block. + mov r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] ;; Load the MethodTable from the object instance in rcx. ALTERNATE_ENTRY StubAVLocation @@ -45,7 +43,7 @@ CurrentEntry = 0 CurrentEntry = CurrentEntry + 1 endm - ;; r10 still contains the indirection cell address. + ;; r11 still contains the indirection cell address. jmp RhpInterfaceDispatchSlow @@ -71,23 +69,6 @@ DEFINE_INTERFACE_DISPATCH_STUB 16 DEFINE_INTERFACE_DISPATCH_STUB 32 DEFINE_INTERFACE_DISPATCH_STUB 64 -;; Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r10 currently contains the indirection cell address. - ;; load rax to point to the vtable offset (which is stored in the m_pCache field). - mov rax, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - add rax, [rcx] - - ;; Load the target address of the vtable into rax - mov rax, [rax] - - TAILJMP_RAX -LEAF_END RhpVTableOffsetDispatch, _TEXT - - ;; Initial dispatch on an interface when we don't have a cache yet. LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch @@ -102,18 +83,6 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - ;; r10 contains indirection cell address, move to r11 where it will be passed by - ;; the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - lea r10, RhpCidResolve - jmp RhpUniversalTransition_DebugStepTailCall - -LEAF_END RhpInterfaceDispatchSlow, _TEXT - - endif ;; FEATURE_CACHED_INTERFACE_DISPATCH end diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 9d73e3783e1aba..cde2dc584e39f5 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,10 +1,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(SHARED_RUNTIME_DIR ../shared_runtime) + # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${ARCH_SOURCES_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../interop/inc) include_directories(${CLR_SRC_NATIVE_DIR}) +include_directories(${SHARED_RUNTIME_DIR}) # needed when zLib compression is used include_directories(${CLR_SRC_NATIVE_DIR}/libs/System.IO.Compression.Native) @@ -41,6 +44,8 @@ if(FEATURE_PERFTRACING) include_directories(${CORECLR_USEREVENTS_SHIM_DIR}) endif(FEATURE_PERFTRACING) +add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) + set(VM_SOURCES_DAC_AND_WKS_COMMON appdomain.cpp array.cpp @@ -285,6 +290,8 @@ set(GC_HEADERS_DAC set(VM_SOURCES_WKS ${VM_SOURCES_DAC_AND_WKS_COMMON} + ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchCoreclr.cpp appdomainnative.cpp assemblynative.cpp assemblyspec.cpp @@ -659,6 +666,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ) endif() + set(ASM_SUFFIX asm) else(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) @@ -723,8 +731,13 @@ else(CLR_CMAKE_TARGET_WIN32) ) endif() + set(ASM_SUFFIX S) endif(CLR_CMAKE_TARGET_WIN32) +set(VM_SOURCES_WKS_ARCH_ASM + ${VM_SOURCES_WKS_ARCH_ASM} + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} +) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH diff --git a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp new file mode 100644 index 00000000000000..304e4be5f4abf4 --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +bool InterfaceDispatch_InitializePal() +{ + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return malloc(size); +} + +// Allocate memory aligned at at least sizeof(void*) +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return malloc(size); +} diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h new file mode 100644 index 00000000000000..df8d106e38053b --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -0,0 +1,189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +#ifndef HOST_WINDOWS +#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; + // TODO-LOONGARCH64: for LoongArch64, it supports 128bits atomic from 3A6000-CPU which is ISA1.1's version. + // The LA64's compiler will translate the `__sync_val_compare_and_swap` into calling the libatomic's library interface to emulate + // the 128-bit CAS by mutex_lock if the target processor doesn't support the ISA1.1. + // But this emulation by libatomic doesn't satisfy requirements here which it must update two adjacent pointers atomically. + // this is being discussed in https://github.com/dotnet/runtime/issues/109276. + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); + PalInterlockedOperationBarrier(); + pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); + return iComparand == iResult; +} +#endif // HOST_AMD64 || HOST_ARM64 || HOST_LOONGARCH64 +#else // HOST_WINDOWS +#if defined(HOST_AMD64) || defined(HOST_ARM64) +EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); +#pragma intrinsic(_InterlockedCompareExchange128) +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // HOST_AMD64 || HOST_ARM64 +#endif // HOST_WINDOWS + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at at least sizeof(void*) +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +enum class DispatchCellType +{ + InterfaceAndSlot = 0x0, + VTableOffset = 0x2, +}; + +struct DispatchCellInfo +{ + DispatchCellType CellType; + MethodDesc *TargetMethod; + DispatchToken Token; + + uintptr_t GetVTableOffset() const + { + return 0; + } + + uint8_t HasCache = 0; +}; + +struct InterfaceDispatchCacheHeader +{ +private: + enum Flags + { + CH_TypeAndSlotIndex = 0x0, + CH_MetadataToken = 0x1, + CH_Mask = 0x3, + CH_Shift = 0x2, + }; + +public: + void Initialize(MethodDesc *pInterfaceMethod, DispatchToken token) + { + m_pMD = pInterfaceMethod; + m_token = token; +/* m_vtableOffset = 0; + m_vtableSecondLevelOffset = 0;*/ + } + + void Initialize(const DispatchCellInfo *pNewCellInfo) + { + m_pMD = pNewCellInfo->TargetMethod; + m_token = pNewCellInfo->Token; + } +/* void Initialize(uint32_t vtableOffset, uint32_t vtableSecondLevelOffset) + { + m_pMD = nullptr; + m_vtableOffset = (uint16_t)vtableOffset; + m_vtableSecondLevelOffset = (uint16_t)vtableSecondLevelOffset; + }*/ + + DispatchCellInfo GetDispatchCellInfo() + { + DispatchCellInfo cellInfo; + if (m_pMD->IsVtableMethod()) + { + cellInfo.CellType = DispatchCellType::VTableOffset; + } + else + { + cellInfo.CellType = DispatchCellType::InterfaceAndSlot; + } + cellInfo.TargetMethod = m_pMD; + cellInfo.Token = m_token; + cellInfo.HasCache = 1; + return cellInfo; + } + +private: + MethodDesc * m_pMD; // MethodDesc to dispatch to + DispatchToken m_token; +}; + +// One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub +// (cache information) and the interface contract, i.e. the interface type and slot being called. +struct InterfaceDispatchCell +{ + // The first two fields must remain together and at the beginning of the structure. This is due to the + // synchronization requirements of the code that updates these at runtime and the instructions generated + // by the binder for interface call sites. + TADDR m_pStub; // Call this code to execute the interface dispatch + volatile TADDR m_pCache; // Context used by the stub above (one or both of the low two bits are set + // for initial dispatch, and if not set, using this as a cache pointer or + // as a vtable offset.) + MethodDesc* m_pMD; // This defines the interface contract for this call site + DispatchToken m_token; + + enum Flags + { + // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for + // extra fields on this type. + // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such + IDC_CachePointerPointerUninitialized = 0x2, + IDC_CachePointerPointsIsVTableOffset = 0x1, + IDC_CachePointerPointsAtCache = 0x0, + IDC_CachePointerMask = 0x3, + IDC_CachePointerMaskShift = 0x2, + }; + + DispatchCellInfo GetDispatchCellInfo() + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + DispatchCellInfo cellInfo; + cellInfo.TargetMethod = m_pMD; + cellInfo.Token = m_token; + + if (IsCache(cachePointerValue)) + { + cellInfo.HasCache = 1; + } + return cellInfo; + } + + static bool IsCache(TADDR value) + { + if ((value & IDC_CachePointerMask) != 0) + { + return false; + } + else + { + return true; + } + } + + InterfaceDispatchCacheHeader* GetCache() const + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + if (IsCache(cachePointerValue)) + { + return (InterfaceDispatchCacheHeader*)cachePointerValue; + } + else + { + return 0; + } + } +}; + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.inc b/src/coreclr/vm/amd64/AsmMacros_Shared.inc new file mode 100644 index 00000000000000..c7e7ce2f562fbb --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.inc @@ -0,0 +1,7 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmConstants.inc +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c510..6dc603aa6bda56 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -7,6 +7,7 @@ include AsmConstants.inc CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc + extern CID_ResolveWorker:proc extern CHAIN_SUCCESS_COUNTER:dword BACKPATCH_FLAG equ 1 ;; Also known as SDF_ResolveBackPatch in the EE @@ -38,6 +39,51 @@ NESTED_ENTRY ResolveWorkerAsmStub, _TEXT NESTED_END ResolveWorkerAsmStub, _TEXT +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable chunk into rax + mov rax, [rax] + + ;; Compute the chunk offset + shr r11d, 16 + + ;; Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub + +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + ;; extern void ResolveWorkerChainLookupAsmStub() LEAF_ENTRY ResolveWorkerChainLookupAsmStub, _TEXT ;; This will perform a quick chained lookup of the entry if the initial cache lookup fails diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 4b38aeeaca52d7..ccaf7bdb6edd73 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -538,6 +538,14 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun #define CallCountingStubData__TargetForThresholdReached 0x10 ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) +#define OFFSETOF__InterfaceDispatchCache__m_rgEntries 0x20 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof(InterfaceDispatchCache, m_rgEntries)) + +#define OFFSETOF__InterfaceDispatchCell__m_pCache 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) + +//ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/ceeload.cpp b/src/coreclr/vm/ceeload.cpp index 219d3eaa5f0246..15696f28f31d92 100644 --- a/src/coreclr/vm/ceeload.cpp +++ b/src/coreclr/vm/ceeload.cpp @@ -42,6 +42,9 @@ #include "threads.h" #include "nativeimage.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" + #ifdef FEATURE_COMINTEROP #include "runtimecallablewrapper.h" #include "comcallablewrapper.h" diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 4ead0cadb8b5ba..731eed44756290 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -15,6 +15,8 @@ #include "common.h" #include "array.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" #ifdef FEATURE_PERFMAP #include "perfmap.h" @@ -1262,6 +1264,126 @@ ResolveCacheElem* __fastcall VirtualCallStubManager::PromoteChainEntry(ResolveCa } #endif // CHAIN_LOOKUP +PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(protectedObj != NULL); + PRECONDITION(*protectedObj != NULL); + PRECONDITION(IsProtectedByGCFrame(protectedObj)); + } CONTRACTL_END; + + MethodTable* objectType = (*protectedObj)->GetMethodTable(); + CONSISTENCY_CHECK(CheckPointer(objectType)); + + PCODE target = (PCODE)NULL; + BOOL patch = VirtualCallStubManager::Resolver(objectType, token, protectedObj, &target, TRUE /* throwOnConflict */); + +#if defined(_DEBUG) + if (!objectType->IsComObjectType() + && !objectType->IsIDynamicInterfaceCastable()) + { + CONSISTENCY_CHECK(!MethodTable::GetMethodDescForSlotAddress(target)->IsGenericMethodDefinition()); + } +#endif // _DEBUG + + if (patch) + { + DispatchCellInfo cellInfo = ((InterfaceDispatchCell*)pCallSite->GetIndirectCell())->GetDispatchCellInfo(); + InterfaceDispatch_UpdateDispatchCellCache((InterfaceDispatchCell*)pCallSite->GetIndirectCell(), target, objectType, &cellInfo); + } + + return target; +} + +/* Resolve to a method and return its address or NULL if there is none + Our return value is the target address that control should continue to. Our caller will + enter the target address as if a direct call with the original stack frame had been made from + the actual call site. Hence our strategy is to either return a target address + of the actual method implementation, or the prestub if we cannot find the actual implementation. + If we are returning a real method address, we may patch the original cell site to point to different + stub. Note, if we encounter a method that hasn't been jitted + yet, we will return the prestub, which should cause it to be jitted and we will + be able to build the dispatching stub on a later call thru the call site. If we encounter + any other kind of problem, rather than throwing an exception, we will also return the + prestub, unless we are unable to find the method at all, in which case we return NULL. + */ +extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, + InterfaceDispatchCell* indirectionCell) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + FrameWithCookie frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + PCODE returnAddress = pSDFrame->GetUnadjustedReturnAddress(); + + StubCallSite callSite((TADDR)indirectionCell, returnAddress); + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + if (pObj == NULL) { + pSDFrame->SetForNullReferenceException(); + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + _ASSERTE(!"Throw returned"); + } + + pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); + pSDFrame->SetFunction(indirectionCell->m_pMD); + + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + + // For Virtual Delegates the m_siteAddr is a field of a managed object + // Thus we have to report it as an interior pointer, + // so that it is updated during a gc + GCPROTECT_BEGININTERIOR( *(callSite.GetIndirectCellAddress()) ); + + GCStress::MaybeTriggerAndProtect(pObj); + + target = CachedInterfaceDispatchResolveWorker(&callSite, protectedObj, indirectionCell->m_token); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + GCPROTECT_END(); + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} /* Resolve to a method and return its address or NULL if there is none. Our return value is the target address that control should continue to. Our caller will enter the target address as if a direct call with the original stack frame had been made from diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 7638f2aec1eb29..e770271670ffa5 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -387,6 +387,12 @@ class VirtualCallStubManager : public StubManager size_t token, void *target); + + // This can be used to find a target without needing the ability to throw + static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); + +public: + //Given a dispatch token, an object and a method table, determine the //target address to go to. The return value (BOOL) states whether this address //is cacheable or not. @@ -396,10 +402,6 @@ class VirtualCallStubManager : public StubManager PCODE * ppTarget, BOOL throwOnConflict); - // This can be used to find a target without needing the ability to throw - static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); - -public: // Return the MethodDesc corresponding to this token. static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT); static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token); From 5f1f2b5e4fb34b99c6a14dd7d1bfe78397b94b97 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 21 Jan 2025 15:35:52 -0800 Subject: [PATCH 04/41] Add indirection cell helper so that VSD and CachedInterfaceDispatch can be switched between --- src/coreclr/vm/CachedInterfaceDispatchPal.h | 5 +++ src/coreclr/vm/genericdict.cpp | 22 +----------- src/coreclr/vm/jitinterface.cpp | 22 +++--------- src/coreclr/vm/virtualcallstub.cpp | 37 +++++++++++++++++++++ src/coreclr/vm/virtualcallstub.h | 4 +++ 5 files changed, 51 insertions(+), 39 deletions(-) diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index df8d106e38053b..b704915c88feac 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -140,6 +140,11 @@ struct InterfaceDispatchCell IDC_CachePointerMaskShift = 0x2, }; + static TADDR InitialDispatchCacheCellValue() + { + return IDC_CachePointerPointerUninitialized; + } + DispatchCellInfo GetDispatchCellInfo() { // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be diff --git a/src/coreclr/vm/genericdict.cpp b/src/coreclr/vm/genericdict.cpp index 9d335522ca1eeb..29414983ab306b 100644 --- a/src/coreclr/vm/genericdict.cpp +++ b/src/coreclr/vm/genericdict.cpp @@ -1041,28 +1041,8 @@ Dictionary::PopulateEntry( if (fRequiresDispatchStub) { - // Generate a dispatch stub and store it in the dictionary. - // - // We generate an indirection so we don't have to write to the dictionary - // when we do updates, and to simplify stub indirect callsites. Stubs stored in - // dictionaries use "RegisterIndirect" stub calling, e.g. "call [eax]", - // i.e. here the register "eax" would contain the value fetched from the dictionary, - // which in turn points to the stub indirection which holds the value the current stub - // address itself. If we just used "call eax" then we wouldn't know which stub indirection - // to update. If we really wanted to avoid the extra indirection we could return the _address_ of the - // dictionary entry to the caller, still using "call [eax]", and then the - // stub dispatch mechanism can update the dictitonary itself and we don't - // need an indirection. LoaderAllocator * pDictLoaderAllocator = (pMT != NULL) ? pMT->GetLoaderAllocator() : pMD->GetLoaderAllocator(); - - VirtualCallStubManager * pMgr = pDictLoaderAllocator->GetVirtualCallStubManager(); - - // We indirect through a cell so that updates can take place atomically. - // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. - // are allocated in the domain of the dicitonary. - PCODE addr = pMgr->GetCallStub(ownerType, methodSlot); - - result = (CORINFO_GENERIC_HANDLE)pMgr->GenerateStubIndirection(addr); + result = (CORINFO_GENERIC_HANDLE)GenerateDispatchStubCellEntrySlot(pDictLoaderAllocator, ownerType, methodSlot, NULL); break; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 0a5e03371994dc..6ec770ff66698d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -5283,31 +5283,17 @@ void CEEInfo::getCallInfo( // We shouldn't be using GetLoaderAllocator here because for LCG, we need to get the // VirtualCallStubManager from where the stub will be used. // For normal methods there is no difference. - LoaderAllocator *pLoaderAllocator = m_pMethodBeingCompiled->GetLoaderAllocator(); - VirtualCallStubManager *pMgr = pLoaderAllocator->GetVirtualCallStubManager(); - - PCODE addr = pMgr->GetCallStub(exactType, pTargetMD); - // Now we want to indirect through a cell so that updates can take place atomically. + LoaderAllocator *pLoaderAllocator = m_pMethodBeingCompiled->GetLoaderAllocator(); + LCGMethodResolver *pResolver = NULL; if (m_pMethodBeingCompiled->IsLCGMethod()) { - // LCG methods should use recycled indcells to prevent leaks. - indcell = pMgr->GenerateStubIndirection(addr, TRUE); - - // Add it to the per DM list so that we can recycle them when the resolver is finalized - LCGMethodResolver *pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetLCGMethodResolver(); - pResolver->AddToUsedIndCellList(indcell); - } - else - { - // Normal methods should avoid recycled cells to preserve the locality of all indcells - // used by one method. - indcell = pMgr->GenerateStubIndirection(addr, FALSE); + pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetLCGMethodResolver(); } // We use an indirect call pResult->stubLookup.constLookup.accessType = IAT_PVALUE; - pResult->stubLookup.constLookup.addr = indcell; + pResult->stubLookup.constLookup.addr = GenerateDispatchStubCellEntryMethodDesc(m_pMethodBeingCompiled->GetLoaderAllocator(), exactType, pTargetMD, pResolver); } #endif // STUB_DISPATCH_PORTABLE } diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 731eed44756290..6626d505059df7 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -99,6 +99,43 @@ SPTR_IMPL_INIT(VirtualCallStubManagerManager, VirtualCallStubManagerManager, g_p #ifndef DACCESS_COMPILE +BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver) +{ + return GenerateDispatchStubCellEntrySlot(pLoaderAllocator, ownerType, pMD->GetSlot(), pResolver); +} + +BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver) +{ + // Generate a dispatch stub and store it in the dictionary. + // + // We generate an indirection so we don't have to write to the dictionary + // when we do updates, and to simplify stub indirect callsites. Stubs stored in + // dictionaries use "RegisterIndirect" stub calling, e.g. "call [eax]", + // i.e. here the register "eax" would contain the value fetched from the dictionary, + // which in turn points to the stub indirection which holds the value the current stub + // address itself. If we just used "call eax" then we wouldn't know which stub indirection + // to update. If we really wanted to avoid the extra indirection we could return the _address_ of the + // dictionary entry to the caller, still using "call [eax]", and then the + // stub dispatch mechanism can update the dictitonary itself and we don't + // need an indirection. + + VirtualCallStubManager * pMgr = pLoaderAllocator->GetVirtualCallStubManager(); + + // We indirect through a cell so that updates can take place atomically. + // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. + // are allocated in the domain of the dicitonary. + PCODE addr = pMgr->GetCallStub(ownerType, methodSlot); + + BYTE* indcell = pMgr->GenerateStubIndirection(addr, pResolver != NULL); + + if (pResolver != NULL) + { + pResolver->AddToUsedIndCellList(indcell); + } + + return indcell; +} + #ifdef STUB_LOGGING UINT32 STUB_MISS_COUNT_VALUE = 100; UINT32 STUB_COLLIDE_WRITE_PCT = 100; diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index e770271670ffa5..1141893af03341 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -1537,5 +1537,9 @@ class BucketTable static FastTable* dead; //linked list head of to be deleted (abandoned) buckets }; +BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver); +BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver); + +inline bool DispatchStubRequiresExtraSlot() { return true; } #endif // !_VIRTUAL_CALL_STUB_H From 976bf83931a440e24c5da45619934f65b84af2e5 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 21 Jan 2025 17:40:52 -0800 Subject: [PATCH 05/41] Ready to try running things. R2R not yet supported. Virtual delegates not yet supported --- .../Runtime/amd64/AsmMacros_Shared.inc | 6 ++ src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 2 +- src/coreclr/vm/CachedInterfaceDispatchPal.h | 13 +-- src/coreclr/vm/comdelegate.cpp | 1 + src/coreclr/vm/jitinterface.cpp | 3 +- src/coreclr/vm/virtualcallstub.cpp | 84 ++++++++++++++----- src/coreclr/vm/virtualcallstub.h | 8 +- 7 files changed, 83 insertions(+), 34 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc new file mode 100644 index 00000000000000..956d4d22e38313 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc @@ -0,0 +1,6 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 5f131132d8ce76..5fa7ac852dd86f 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -29,7 +29,7 @@ struct DispatchCellInfo uint32_t MetadataToken = 0; uint32_t VTableOffset = 0; - uint32_t GetVTableOffset() + uint32_t GetVTableOffset() const { return VTableOffset; } diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index b704915c88feac..7c992f57508e1d 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -49,7 +49,6 @@ enum class DispatchCellType struct DispatchCellInfo { DispatchCellType CellType; - MethodDesc *TargetMethod; DispatchToken Token; uintptr_t GetVTableOffset() const @@ -72,9 +71,8 @@ struct InterfaceDispatchCacheHeader }; public: - void Initialize(MethodDesc *pInterfaceMethod, DispatchToken token) + void Initialize(DispatchToken token) { - m_pMD = pInterfaceMethod; m_token = token; /* m_vtableOffset = 0; m_vtableSecondLevelOffset = 0;*/ @@ -82,7 +80,6 @@ struct InterfaceDispatchCacheHeader void Initialize(const DispatchCellInfo *pNewCellInfo) { - m_pMD = pNewCellInfo->TargetMethod; m_token = pNewCellInfo->Token; } /* void Initialize(uint32_t vtableOffset, uint32_t vtableSecondLevelOffset) @@ -95,7 +92,7 @@ struct InterfaceDispatchCacheHeader DispatchCellInfo GetDispatchCellInfo() { DispatchCellInfo cellInfo; - if (m_pMD->IsVtableMethod()) + if (m_token.IsThisToken()) { cellInfo.CellType = DispatchCellType::VTableOffset; } @@ -103,15 +100,14 @@ struct InterfaceDispatchCacheHeader { cellInfo.CellType = DispatchCellType::InterfaceAndSlot; } - cellInfo.TargetMethod = m_pMD; cellInfo.Token = m_token; cellInfo.HasCache = 1; return cellInfo; } private: - MethodDesc * m_pMD; // MethodDesc to dispatch to DispatchToken m_token; + TADDR padding; // Ensure that the size of this structure is a multiple of 2 pointers }; // One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub @@ -125,7 +121,7 @@ struct InterfaceDispatchCell volatile TADDR m_pCache; // Context used by the stub above (one or both of the low two bits are set // for initial dispatch, and if not set, using this as a cache pointer or // as a vtable offset.) - MethodDesc* m_pMD; // This defines the interface contract for this call site + TADDR dummy; // Padding to make the size of the structure a multiple of 2 pointers DispatchToken m_token; enum Flags @@ -151,7 +147,6 @@ struct InterfaceDispatchCell // modified on another thread while this function is executing.) TADDR cachePointerValue = m_pCache; DispatchCellInfo cellInfo; - cellInfo.TargetMethod = m_pMD; cellInfo.Token = m_token; if (IsCache(cachePointerValue)) diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 18fb1f1e103284..2c0b149dc0026c 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -980,6 +980,7 @@ static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) // need to grab a virtual dispatch stub // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); + _ASSERTE(!UseCachedInterfaceDispatch()); // This code path is not yet ready PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); _ASSERTE(pTargetCall); return pTargetCall; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 6ec770ff66698d..f21e572484abc6 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13668,7 +13668,8 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, // aren't quite set up to accept that. Furthermore the call sequences would be different - at // the moment an indirection cell uses "call [cell-addr]" on x86, and instead we would want the // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" - result = pMgr->GetCallStub(ownerType, slot); + DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, slot); + result = pMgr->GetCallStub(token); } break; #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 6626d505059df7..d16c0b18a9f5ee 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -104,9 +104,11 @@ BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, return GenerateDispatchStubCellEntrySlot(pLoaderAllocator, ownerType, pMD->GetSlot(), pResolver); } +extern "C" void RhpInitialInterfaceDispatch(); + BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver) { - // Generate a dispatch stub and store it in the dictionary. + // Generate a dispatch stub and gather a slot. // // We generate an indirection so we don't have to write to the dictionary // when we do updates, and to simplify stub indirect callsites. Stubs stored in @@ -124,9 +126,10 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH // We indirect through a cell so that updates can take place atomically. // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. // are allocated in the domain of the dicitonary. - PCODE addr = pMgr->GetCallStub(ownerType, methodSlot); + DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, methodSlot); + PCODE addr = UseCachedInterfaceDispatch() ? (PCODE)RhpInitialInterfaceDispatch : pMgr->GetCallStub(token); - BYTE* indcell = pMgr->GenerateStubIndirection(addr, pResolver != NULL); + BYTE* indcell = pMgr->GenerateStubIndirection(addr, token, pResolver != NULL); if (pResolver != NULL) { @@ -993,6 +996,29 @@ BOOL VirtualCallStubManager::TraceManager(Thread *thread, #ifndef DACCESS_COMPILE +DispatchToken VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_ANY; + INJECT_FAULT(COMPlusThrowOM();); + } + CONTRACTL_END + + MethodTable * pMT = ownerType.GetMethodTable(); + pMT->GetRestoredSlot(slot); + + DispatchToken token; + if (pMT->IsInterface()) + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + else + token = DispatchToken::CreateDispatchToken(slot); + + return token; +} + PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) { CONTRACTL { @@ -1004,11 +1030,13 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) INJECT_FAULT(COMPlusThrowOM();); } CONTRACTL_END; - return GetCallStub(ownerType, pMD->GetSlot()); + DispatchToken token = GetTokenFromFromOwnerAndSlot(ownerType, pMD->GetSlot()); + + return GetCallStub(token); } //find or create a stub -PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot) +PCODE VirtualCallStubManager::GetCallStub(DispatchToken token) { CONTRACT (PCODE) { THROWS; @@ -1020,15 +1048,6 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot) GCX_COOP(); // This is necessary for BucketTable synchronization - MethodTable * pMT = ownerType.GetMethodTable(); - pMT->GetRestoredSlot(slot); - - DispatchToken token; - if (pMT->IsInterface()) - token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); - else - token = DispatchToken::CreateDispatchToken(slot); - //get a stub from lookups, make if necessary PCODE stub = CALL_STUB_EMPTY_ENTRY; PCODE addrOfResolver = GetEEFuncEntryPoint(ResolveWorkerAsmStub); @@ -1124,7 +1143,7 @@ VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) // m_RecycledIndCellList when it is finalized. // //+---------------------------------------------------------------------------- -BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRecycledCell /* = FALSE*/ ) +BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToken token, BOOL fUseRecycledCell /* = FALSE*/ ) { CONTRACT (BYTE*) { THROWS; @@ -1134,7 +1153,7 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec POSTCONDITION(CheckPointer(RETVAL)); } CONTRACT_END; - _ASSERTE(isStubStatic(target)); + _ASSERTE(UseCachedInterfaceDispatch() || isStubStatic(target)); CrstHolder lh(&m_indCellLock); @@ -1142,6 +1161,8 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec BYTE * ret = NULL; UINT32 cellsPerBlock = INDCELLS_PER_BLOCK; + UINT32 sizeOfIndCell = UseCachedInterfaceDispatch() ? sizeof(InterfaceDispatchCell) : sizeof(BYTE *); + // First try the recycled indirection cell list for Dynamic methods if (fUseRecycledCell) ret = GetOneRecycledIndCell(); @@ -1153,24 +1174,38 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec // Allocate from loader heap if (!ret) { + size_t alignment = UseCachedInterfaceDispatch() ? sizeof(TADDR) * 2 : sizeof(TADDR); // Free list is empty, allocate a block of indcells from indcell_heap and insert it into the free list. - BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocMem(S_SIZE_T(cellsPerBlock) * S_SIZE_T(sizeof(BYTE *))); + BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(cellsPerBlock * sizeOfIndCell, alignment); // return the first cell in the block and add the rest to the free list ret = (BYTE *)pBlock; // link all the cells together // we don't need to null terminate the linked list, InsertIntoFreeIndCellList will do it. + BYTE** pBlockCur = pBlock; for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) { - pBlock[i] = (BYTE *)&(pBlock[i+1]); + *pBlockCur = (BYTE *)&(pBlock[i+1]); + pBlockCur = (BYTE**)(((BYTE*)pBlockCur) + sizeOfIndCell); } // insert the list into the free indcell list. InsertIntoFreeIndCellList((BYTE *)&pBlock[1], (BYTE*)&pBlock[cellsPerBlock - 1]); } - *((PCODE *)ret) = target; + if (UseCachedInterfaceDispatch()) + { + InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)ret; + pCell->m_pStub = target; + pCell->m_pCache = InterfaceDispatchCell::InitialDispatchCacheCellValue(); + pCell->m_token = token; + ret = (BYTE *)pCell; + } + else + { + *((PCODE *)ret) = target; + } RETURN ret; } @@ -1390,7 +1425,16 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, } pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); - pSDFrame->SetFunction(indirectionCell->m_pMD); + + DispatchToken representativeToken(indirectionCell->m_token); + MethodTable * pRepresentativeMT = pObj->GetMethodTable(); + if (representativeToken.IsTypedToken()) + { + pRepresentativeMT = AppDomain::GetCurrentDomain()->LookupType(representativeToken.GetTypeID()); + CONSISTENCY_CHECK(CheckPointer(pRepresentativeMT)); + } + + pSDFrame->SetRepresentativeSlot(pRepresentativeMT, representativeToken.GetSlotNumber()); pSDFrame->Push(CURRENT_THREAD); INSTALL_MANAGED_EXCEPTION_DISPATCHER; diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 1141893af03341..b3e9f78290d732 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -226,13 +226,15 @@ class VirtualCallStubManager : public StubManager // The function is idempotent, i.e. // you'll get the same callstub twice if you call it with identical inputs. PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); - PCODE GetCallStub(TypeHandle ownerType, DWORD slot); + PCODE GetCallStub(DispatchToken token); + + static DispatchToken GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); // Stubs for vtable-based virtual calls with no lookups PCODE GetVTableCallStub(DWORD slot); // Generate an fresh indirection cell. - BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); + BYTE* GenerateStubIndirection(PCODE stub, DispatchToken token, BOOL fUseRecycledCell = FALSE); // Set up static data structures - called during EEStartup static void InitStatic(); @@ -1540,6 +1542,6 @@ class BucketTable BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver); BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver); -inline bool DispatchStubRequiresExtraSlot() { return true; } +inline bool UseCachedInterfaceDispatch() { return true; } #endif // !_VIRTUAL_CALL_STUB_H From 652930c64c0e1c5762ed0df9ef4bffbffacc584b Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 22 Jan 2025 11:16:36 -0800 Subject: [PATCH 06/41] Initialize CachedInterfaceDispatch at startup --- src/coreclr/vm/virtualcallstub.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index d16c0b18a9f5ee..f54f659113e7fd 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -783,6 +783,8 @@ void VirtualCallStubManager::InitStatic() { STANDARD_VM_CONTRACT; + InterfaceDispatch_Initialize(); + #ifdef STUB_LOGGING // Note if you change these values using environment variables then you must use hex values :-( STUB_MISS_COUNT_VALUE = (INT32) CLRConfig::GetConfigValue(CLRConfig::INTERNAL_VirtualCallStubMissCount); @@ -1186,12 +1188,13 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke BYTE** pBlockCur = pBlock; for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) { - *pBlockCur = (BYTE *)&(pBlock[i+1]); - pBlockCur = (BYTE**)(((BYTE*)pBlockCur) + sizeOfIndCell); + BYTE** pBlockNext = (BYTE**)(((BYTE*)pBlockCur) + sizeOfIndCell); + *pBlockCur = (BYTE *)pBlockNext; + pBlockCur = (BYTE**)pBlockNext; } // insert the list into the free indcell list. - InsertIntoFreeIndCellList((BYTE *)&pBlock[1], (BYTE*)&pBlock[cellsPerBlock - 1]); + InsertIntoFreeIndCellList((((BYTE*)pBlock) + sizeOfIndCell), (((BYTE*)pBlock) + ((cellsPerBlock - 1) * sizeOfIndCell))); } if (UseCachedInterfaceDispatch()) From 39a257453b32c9401266b9b4675d5e3204045aca Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 22 Jan 2025 12:56:48 -0800 Subject: [PATCH 07/41] AMD64 seems to work --- src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 1 - src/coreclr/vm/comdelegate.cpp | 23 ++++-- src/coreclr/vm/jitinterface.cpp | 6 +- src/coreclr/vm/virtualcallstub.cpp | 74 +++++++++++++++++-- 4 files changed, 87 insertions(+), 17 deletions(-) diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index 6dc603aa6bda56..14feddcc142376 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -69,7 +69,6 @@ LEAF_END RhpVTableOffsetDispatch, _TEXT ;; On Input: ;; r11 contains the address of the indirection cell ;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub - NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT PROLOG_WITH_TRANSITION_BLOCK diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 2c0b149dc0026c..c2dfd44150b01a 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -960,6 +960,8 @@ static PCODE SetupShuffleThunk(MethodTable * pDelMT, MethodDesc *pTargetMeth) return pShuffleThunk->GetEntryPoint(); } +extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBlock); + static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) { CONTRACTL @@ -977,13 +979,20 @@ static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) COMPlusThrow(kNotSupportedException); } - // need to grab a virtual dispatch stub - // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. - VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); - _ASSERTE(!UseCachedInterfaceDispatch()); // This code path is not yet ready - PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); - _ASSERTE(pTargetCall); - return pTargetCall; + if (UseCachedInterfaceDispatch()) + { + return (PCODE)CID_VirtualOpenDelegateDispatch; + } + else + { + // need to grab a virtual dispatch stub + // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. + VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); + _ASSERTE(!UseCachedInterfaceDispatch()); // This code path is not yet ready + PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); + _ASSERTE(pTargetCall); + return pTargetCall; + } } extern "C" BOOL QCALLTYPE Delegate_BindToMethodName(QCall::ObjectHandleOnStack d, QCall::ObjectHandleOnStack target, diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index f21e572484abc6..f4b83ac315ceee 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13669,7 +13669,11 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, // the moment an indirection cell uses "call [cell-addr]" on x86, and instead we would want the // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, slot); - result = pMgr->GetCallStub(token); + + if (UseCachedInterfaceDispatch()) + return NULL; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method + else + result = pMgr->GetCallStub(token); } break; #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index f54f659113e7fd..3475280b6dad18 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -17,6 +17,7 @@ #include "array.h" #include "CachedInterfaceDispatchPal.h" #include "CachedInterfaceDispatch.h" +#include "comdelegate.h" #ifdef FEATURE_PERFMAP #include "perfmap.h" @@ -1365,7 +1366,7 @@ PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *p } #endif // _DEBUG - if (patch) + if (patch && pCallSite != NULL) { DispatchCellInfo cellInfo = ((InterfaceDispatchCell*)pCallSite->GetIndirectCell())->GetDispatchCellInfo(); InterfaceDispatch_UpdateDispatchCellCache((InterfaceDispatchCell*)pCallSite->GetIndirectCell(), target, objectType, &cellInfo); @@ -1374,6 +1375,70 @@ PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *p return target; } +// Resolve a dispatch on a virtual open delegate without updating any pointers +extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBlock) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + FrameWithCookie frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + if (pObj == NULL) { + pSDFrame->SetForNullReferenceException(); + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + _ASSERTE(!"Throw returned"); + } + + MethodDesc *pTargetMD = COMDelegate::GetMethodDesc(pObj); + pSDFrame->SetFunction(pTargetMD); + + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + + GCStress::MaybeTriggerAndProtect(pObj); + + DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(TypeHandle(pTargetMD->GetMethodTable()), pTargetMD->GetSlot()); + target = CachedInterfaceDispatchResolveWorker(NULL, protectedObj, token); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} + /* Resolve to a method and return its address or NULL if there is none Our return value is the target address that control should continue to. Our caller will enter the target address as if a direct call with the original stack frame had been made from @@ -1443,11 +1508,6 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, INSTALL_MANAGED_EXCEPTION_DISPATCHER; INSTALL_UNWIND_AND_CONTINUE_HANDLER; - // For Virtual Delegates the m_siteAddr is a field of a managed object - // Thus we have to report it as an interior pointer, - // so that it is updated during a gc - GCPROTECT_BEGININTERIOR( *(callSite.GetIndirectCellAddress()) ); - GCStress::MaybeTriggerAndProtect(pObj); target = CachedInterfaceDispatchResolveWorker(&callSite, protectedObj, indirectionCell->m_token); @@ -1460,8 +1520,6 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, } #endif // _DEBUG - GCPROTECT_END(); - UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; pSDFrame->Pop(CURRENT_THREAD); From 4c0865cc0bf37853fed7e3a63f3d498a25eaae61 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 22 Jan 2025 13:56:32 -0800 Subject: [PATCH 08/41] Arm64 Windows assembly written and factored amd64 to be similar --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 6 ++ .../amd64/CachedInterfaceDispatchAot.asm | 38 ++++++++++++ .../nativeaot/Runtime/amd64/MiscStubs.asm | 25 -------- .../Runtime/arm64/AsmMacros_Shared.h | 6 ++ .../arm64/CachedInterfaceDispatchAot.asm | 48 +++++++++++++++ .../shared_runtime/arm64/StubDispatch.asm | 38 +----------- src/coreclr/vm/CMakeLists.txt | 2 + .../amd64/CachedInterfaceDispatchCoreCLR.asm | 53 ++++++++++++++++ src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 45 -------------- src/coreclr/vm/arm64/AsmMacros_Shared.h | 8 +++ .../arm64/CachedInterfaceDispatchCoreCLR.asm | 61 +++++++++++++++++++ src/coreclr/vm/arm64/asmconstants.h | 6 ++ 12 files changed, 230 insertions(+), 106 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm create mode 100644 src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h create mode 100644 src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm create mode 100644 src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm create mode 100644 src/coreclr/vm/arm64/AsmMacros_Shared.h create mode 100644 src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 278790c3a5cd56..7bb016948e14a5 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -216,6 +216,12 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + list(APPEND RUNTIME_SOURCES_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchAot.${ASM_SUFFIX} + ) +endif () + # Add architecture specific folder for looking up headers. convert_to_absolute_path(ARCH_SOURCES_DIR ${ARCH_SOURCES_DIR}) include_directories(${ARCH_SOURCES_DIR}) diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm new file mode 100644 index 00000000000000..a85ecfb05b6f8c --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,38 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load rax to point to the vtable offset (which is stored in the m_pCache field). + mov rax, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable into rax + mov rax, [rax] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ;; r11 contains indirection cell address + lea r10, RhpCidResolve + jmp RhpUniversalTransition_DebugStepTailCall + +LEAF_END RhpInterfaceDispatchSlow, _TEXT + +end diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm index 2f18ce28227bc1..3b2f3147316450 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm @@ -40,29 +40,4 @@ ProbeLoop: LEAF_END RhpStackProbe, _TEXT -;; Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r11 currently contains the indirection cell address. - ;; load rax to point to the vtable offset (which is stored in the m_pCache field). - mov rax, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - add rax, [rcx] - - ;; Load the target address of the vtable into rax - mov rax, [rax] - - TAILJMP_RAX -LEAF_END RhpVTableOffsetDispatch, _TEXT - -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - ;; r11 contains indirection cell address - lea r10, RhpCidResolve - jmp RhpUniversalTransition_DebugStepTailCall - -LEAF_END RhpInterfaceDispatchSlow, _TEXT - end diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..9b15544d43e036 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h @@ -0,0 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "AsmMacros.h" \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm new file mode 100644 index 00000000000000..03e9cffed260f0 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,48 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransition_DebugStepTailCall + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + ;; x11 contains the interface dispatch cell address. + ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + ;; Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution. +;; + LEAF_ENTRY RhpInterfaceDispatchSlow + ;; x11 contains the interface dispatch cell address. + ;; Calling convention of the universal thunk is: + ;; xip0: target address for the thunk to call + ;; xip1: parameter of the thunk's target + ldr xip0, =RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/shared_runtime/arm64/StubDispatch.asm b/src/coreclr/shared_runtime/arm64/StubDispatch.asm index 93e6038f1047a0..697d3a10f52e01 100644 --- a/src/coreclr/shared_runtime/arm64/StubDispatch.asm +++ b/src/coreclr/shared_runtime/arm64/StubDispatch.asm @@ -1,14 +1,13 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -#include "AsmMacros.h" +#include "AsmMacros_Shared.h" TEXTAREA #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - EXTERN RhpCidResolve - EXTERN RhpUniversalTransition_DebugStepTailCall + EXTERN RhpInterfaceDispatchSlow ;; Macro that generates code to check a single cache entry. MACRO @@ -88,39 +87,6 @@ CurrentEntry SETA CurrentEntry + 1 b RhpInterfaceDispatchSlow LEAF_END RhpInitialInterfaceDispatch -;; -;; Stub dispatch routine for dispatch to a vtable slot -;; - LEAF_ENTRY RhpVTableOffsetDispatch - ;; x11 contains the interface dispatch cell address. - ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - ;; Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch - -;; -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution. -;; - LEAF_ENTRY RhpInterfaceDispatchSlow - ;; x11 contains the interface dispatch cell address. - ;; Calling convention of the universal thunk is: - ;; xip0: target address for the thunk to call - ;; xip1: parameter of the thunk's target - ldr xip0, =RhpCidResolve - mov xip1, x11 - b RhpUniversalTransition_DebugStepTailCall - LEAF_END RhpInterfaceDispatchSlow - #endif // FEATURE_CACHED_INTERFACE_DISPATCH END diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index cde2dc584e39f5..52d5a5f6e5156c 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -634,6 +634,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -659,6 +660,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) set(VM_HEADERS_WKS_ARCH_ASM diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 00000000000000..9ed5b458204d82 --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,53 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include +include AsmConstants.inc + + extern CID_ResolveWorker:proc + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable chunk into rax + mov rax, [rax] + + ;; Compute the chunk offset + shr r11d, 16 + + ;; Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index 14feddcc142376..b533789980c510 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -7,7 +7,6 @@ include AsmConstants.inc CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc - extern CID_ResolveWorker:proc extern CHAIN_SUCCESS_COUNTER:dword BACKPATCH_FLAG equ 1 ;; Also known as SDF_ResolveBackPatch in the EE @@ -39,50 +38,6 @@ NESTED_ENTRY ResolveWorkerAsmStub, _TEXT NESTED_END ResolveWorkerAsmStub, _TEXT -;; Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r11 currently contains the indirection cell address. - ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). - mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust - ;; to get to the VTable chunk - mov rax, r11 - shr rax, 32 - - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable chunk list of what we want to dereference - add rax, [rcx] - - ;; Load the target address of the vtable chunk into rax - mov rax, [rax] - - ;; Compute the chunk offset - shr r11d, 16 - - ;; Load the target address of the virtual function into rax - mov rax, [rax + r11] - - TAILJMP_RAX -LEAF_END RhpVTableOffsetDispatch, _TEXT - -;; On Input: -;; r11 contains the address of the indirection cell -;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub -NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT - - PROLOG_WITH_TRANSITION_BLOCK - - lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock - mov rdx, r11 ; indirection cell - - call CID_ResolveWorker - - EPILOG_WITH_TRANSITION_BLOCK_TAILCALL - TAILJMP_RAX - -NESTED_END RhpInterfaceDispatchSlow, _TEXT - ;; extern void ResolveWorkerChainLookupAsmStub() LEAF_ENTRY ResolveWorkerChainLookupAsmStub, _TEXT ;; This will perform a quick chained lookup of the entry if the initial cache lookup fails diff --git a/src/coreclr/vm/arm64/AsmMacros_Shared.h b/src/coreclr/vm/arm64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..33b9ddaa26e0f5 --- /dev/null +++ b/src/coreclr/vm/arm64/AsmMacros_Shared.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 00000000000000..2f82d9664c0538 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,61 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + + TEXTAREA + + EXTERN CID_ResolveWorker + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + lsr x10, x11, #32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + ldr x9, [x0] + add x9, x10, x9 + + ;; Load the target address of the vtable chunk into rax + ldr x9, [x9] + + ;; Compute the chunk offset + ubfx x10, x11, #16, #16 + + ;; Load the target address of the virtual function into rax + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; x11 contains the interface dispatch cell address. +;; + NESTED_ENTRY RhpInterfaceDispatchSlow + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_ResolveWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow + + END diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 9c263b945a84f9..ea5b5b12ffbd87 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -235,6 +235,12 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun #define CallCountingStubData__TargetForThresholdReached 0x10 ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) +#define OFFSETOF__InterfaceDispatchCache__m_rgEntries 0x20 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof(InterfaceDispatchCache, m_rgEntries)) + +#define OFFSETOF__InterfaceDispatchCell__m_pCache 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 From 645c487e11b37c7172b916aa81cf669cc27b71f8 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 22 Jan 2025 14:42:45 -0800 Subject: [PATCH 09/41] Allow there to be flavors of the build which do not build cached interface dispatch --- src/coreclr/clrfeatures.cmake | 8 ++++++ src/coreclr/debug/CMakeLists.txt | 8 ++++++ src/coreclr/vm/CMakeLists.txt | 41 +++++++++++++++++++++++------- src/coreclr/vm/comdelegate.cpp | 9 +++---- src/coreclr/vm/jitinterface.cpp | 5 ++-- src/coreclr/vm/virtualcallstub.cpp | 26 ++++++++++++------- src/coreclr/vm/virtualcallstub.h | 10 ++++++++ 7 files changed, 81 insertions(+), 26 deletions(-) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 2bf7216af1b4db..26c2d25e7d417a 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -39,3 +39,11 @@ endif() if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) endif(CLR_CMAKE_TARGET_WIN32) + +if (CLR_CMAKE_TARGET_WIN32) + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) + endif() +endif() + +set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) diff --git a/src/coreclr/debug/CMakeLists.txt b/src/coreclr/debug/CMakeLists.txt index d0a999f65c1485..26d3369d49d13e 100644 --- a/src/coreclr/debug/CMakeLists.txt +++ b/src/coreclr/debug/CMakeLists.txt @@ -1,3 +1,11 @@ +if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) +endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + +if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + add_definitions(-DFEATURE_VIRTUAL_STUB_DISPATCH) +endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + add_subdirectory(daccess) add_subdirectory(ee) add_subdirectory(di) diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 52d5a5f6e5156c..2cb11ca5cd8540 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -44,7 +44,13 @@ if(FEATURE_PERFTRACING) include_directories(${CORECLR_USEREVENTS_SHIM_DIR}) endif(FEATURE_PERFTRACING) -add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) +if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) +endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + +if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + add_definitions(-DFEATURE_VIRTUAL_STUB_DISPATCH) +endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) set(VM_SOURCES_DAC_AND_WKS_COMMON appdomain.cpp @@ -290,8 +296,6 @@ set(GC_HEADERS_DAC set(VM_SOURCES_WKS ${VM_SOURCES_DAC_AND_WKS_COMMON} - ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp - CachedInterfaceDispatchCoreclr.cpp appdomainnative.cpp assemblynative.cpp assemblyspec.cpp @@ -380,6 +384,13 @@ set(VM_SOURCES_WKS ${VM_SOURCES_GDBJIT} ) +if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + list(APPEND VM_SOURCES_WKS + ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchCoreclr.cpp + ) +endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + # coreclr needs to compile codeman.cpp differently depending on flavor (i.e. dll vs. static lib)) list(REMOVE_ITEM VM_SOURCES_WKS codeman.cpp) @@ -634,9 +645,14 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) + if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm + ) + endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) @@ -660,9 +676,14 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) + if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm + ) + endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) @@ -736,10 +757,12 @@ else(CLR_CMAKE_TARGET_WIN32) set(ASM_SUFFIX S) endif(CLR_CMAKE_TARGET_WIN32) -set(VM_SOURCES_WKS_ARCH_ASM - ${VM_SOURCES_WKS_ARCH_ASM} - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} -) +if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + set(VM_SOURCES_WKS_ARCH_ASM + ${VM_SOURCES_WKS_ARCH_ASM} + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} + ) +endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index c2dfd44150b01a..efcc9f0197aec5 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -979,12 +979,9 @@ static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) COMPlusThrow(kNotSupportedException); } - if (UseCachedInterfaceDispatch()) - { + INTERFACE_DISPATCH_CACHED_OR_VSD( return (PCODE)CID_VirtualOpenDelegateDispatch; - } - else - { + , // need to grab a virtual dispatch stub // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); @@ -992,7 +989,7 @@ static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); _ASSERTE(pTargetCall); return pTargetCall; - } + ); } extern "C" BOOL QCALLTYPE Delegate_BindToMethodName(QCall::ObjectHandleOnStack d, QCall::ObjectHandleOnStack target, diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index f4b83ac315ceee..e96dc1dd14d92d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13670,10 +13670,11 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, slot); - if (UseCachedInterfaceDispatch()) + INTERFACE_DISPATCH_CACHED_OR_VSD( return NULL; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method - else + , result = pMgr->GetCallStub(token); + ); } break; #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 3475280b6dad18..0edf695a7642b3 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -128,7 +128,9 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. // are allocated in the domain of the dicitonary. DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, methodSlot); - PCODE addr = UseCachedInterfaceDispatch() ? (PCODE)RhpInitialInterfaceDispatch : pMgr->GetCallStub(token); + + PCODE addr; + INTERFACE_DISPATCH_CACHED_OR_VSD(addr = (PCODE)RhpInitialInterfaceDispatch, addr = pMgr->GetCallStub(token)) BYTE* indcell = pMgr->GenerateStubIndirection(addr, token, pResolver != NULL); @@ -784,7 +786,9 @@ void VirtualCallStubManager::InitStatic() { STANDARD_VM_CONTRACT; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH InterfaceDispatch_Initialize(); +#endif #ifdef STUB_LOGGING // Note if you change these values using environment variables then you must use hex values :-( @@ -1164,7 +1168,8 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke BYTE * ret = NULL; UINT32 cellsPerBlock = INDCELLS_PER_BLOCK; - UINT32 sizeOfIndCell = UseCachedInterfaceDispatch() ? sizeof(InterfaceDispatchCell) : sizeof(BYTE *); + UINT32 sizeOfIndCell; + INTERFACE_DISPATCH_CACHED_OR_VSD(sizeOfIndCell = sizeof(InterfaceDispatchCell), sizeOfIndCell = sizeof(BYTE *)); // First try the recycled indirection cell list for Dynamic methods if (fUseRecycledCell) @@ -1177,7 +1182,9 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke // Allocate from loader heap if (!ret) { - size_t alignment = UseCachedInterfaceDispatch() ? sizeof(TADDR) * 2 : sizeof(TADDR); + size_t alignment; + INTERFACE_DISPATCH_CACHED_OR_VSD(alignment = sizeof(TADDR) * 2, alignment = sizeof(TADDR)); + // Free list is empty, allocate a block of indcells from indcell_heap and insert it into the free list. BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(cellsPerBlock * sizeOfIndCell, alignment); @@ -1198,18 +1205,16 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke InsertIntoFreeIndCellList((((BYTE*)pBlock) + sizeOfIndCell), (((BYTE*)pBlock) + ((cellsPerBlock - 1) * sizeOfIndCell))); } - if (UseCachedInterfaceDispatch()) - { + INTERFACE_DISPATCH_CACHED_OR_VSD( InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)ret; pCell->m_pStub = target; pCell->m_pCache = InterfaceDispatchCell::InitialDispatchCacheCellValue(); pCell->m_token = token; ret = (BYTE *)pCell; - } - else - { + , *((PCODE *)ret) = target; - } + ) + RETURN ret; } @@ -1340,6 +1345,7 @@ ResolveCacheElem* __fastcall VirtualCallStubManager::PromoteChainEntry(ResolveCa } #endif // CHAIN_LOOKUP +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token) { CONTRACTL { @@ -1526,6 +1532,8 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, return target; } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + /* Resolve to a method and return its address or NULL if there is none. Our return value is the target address that control should continue to. Our caller will enter the target address as if a direct call with the original stack frame had been made from diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index b3e9f78290d732..3ee2d2982e284d 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -1544,4 +1544,14 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH inline bool UseCachedInterfaceDispatch() { return true; } +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispath; } +#elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { cachedDispatch; } +#elif defined(FEATURE_VIRTUAL_STUB_DISPATCH) +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { vsdDispath; } +#else +#error "No dispatch mechanism defined" +#endif + #endif // !_VIRTUAL_CALL_STUB_H From 921631a9c00b6d9d5acff501bb1b350bb261bdf9 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Thu, 23 Jan 2025 16:09:12 -0800 Subject: [PATCH 10/41] Make it possible for some OS/Architecture sets to have cached interface dispatch or virtual stub dispatch --- src/coreclr/clrfeatures.cmake | 4 +- src/coreclr/debug/daccess/dacdbiimpl.cpp | 2 + src/coreclr/debug/daccess/request.cpp | 10 +- src/coreclr/vm/CMakeLists.txt | 9 +- src/coreclr/vm/amd64/asmconstants.h | 2 + src/coreclr/vm/amd64/excepamd64.cpp | 46 ++++-- src/coreclr/vm/arm64/asmconstants.h | 4 + src/coreclr/vm/arm64/asmhelpers.S | 3 +- src/coreclr/vm/arm64/asmhelpers.asm | 6 + src/coreclr/vm/arm64/stubs.cpp | 47 ++++-- src/coreclr/vm/callhelpers.cpp | 2 +- src/coreclr/vm/codeman.h | 22 +-- src/coreclr/vm/comdelegate.cpp | 12 +- src/coreclr/vm/excep.cpp | 9 ++ src/coreclr/vm/method.cpp | 26 --- src/coreclr/vm/method.hpp | 3 - src/coreclr/vm/prestub.cpp | 43 ++++- src/coreclr/vm/stubmgr.cpp | 10 +- src/coreclr/vm/stubmgr.h | 13 ++ src/coreclr/vm/virtualcallstub.cpp | 192 +++++++++++++++++++++-- src/coreclr/vm/virtualcallstub.h | 106 +++++++++++-- 21 files changed, 453 insertions(+), 118 deletions(-) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 26c2d25e7d417a..9d7e8017489b22 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -46,4 +46,6 @@ if (CLR_CMAKE_TARGET_WIN32) endif() endif() -set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) +if (NOT (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64))) + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) +endif() diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp index eef3dc127b9206..922925c19bbe4b 100644 --- a/src/coreclr/debug/daccess/dacdbiimpl.cpp +++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp @@ -3544,7 +3544,9 @@ void DacDbiInterfaceImpl::EnumerateMemRangesForLoaderAllocator(PTR_LoaderAllocat if (pVcsMgr) { if (pVcsMgr->indcell_heap != NULL) heapsToEnumerate.Push(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (pVcsMgr->cache_entry_heap != NULL) heapsToEnumerate.Push(pVcsMgr->cache_entry_heap); +#endif } TADDR rangeAccumAsTaddr = TO_TADDR(rangeAcummulator); diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index 7a07a8c0f6c3c1..f2ea1c50a2c248 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -3595,14 +3595,18 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType break; case CacheEntryHeap: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeap = pVcsMgr->cache_entry_heap; +#else + hr = S_OK; +#endif break; default: hr = E_INVALIDARG; } - if (SUCCEEDED(hr)) + if (SUCCEEDED(hr) && (pLoaderHeap != NULL)) { hr = TraverseLoaderHeapBlock(pLoaderHeap->m_pFirstBlock, pFunc); } @@ -3645,7 +3649,9 @@ static const char *LoaderAllocatorLoaderHeapNames[] = "FixupPrecodeHeap", "NewStubPrecodeHeap", "IndcellHeap", +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH "CacheEntryHeap", +#endif }; @@ -3689,7 +3695,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd else { pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->cache_entry_heap); +#endif } // All of the above are "LoaderHeap" and not the ExplicitControl version. diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 2cb11ca5cd8540..bfbb79090f33e7 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -644,13 +644,18 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/Context.asm ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm - ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm ) + if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm + ) + endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) list(APPEND VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm - ) + ) endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) set(VM_HEADERS_WKS_ARCH_ASM diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index ccaf7bdb6edd73..cc10d134101531 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -538,11 +538,13 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun #define CallCountingStubData__TargetForThresholdReached 0x10 ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH #define OFFSETOF__InterfaceDispatchCache__m_rgEntries 0x20 ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof(InterfaceDispatchCache, m_rgEntries)) #define OFFSETOF__InterfaceDispatchCell__m_pCache 0x08 ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH //ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) diff --git a/src/coreclr/vm/amd64/excepamd64.cpp b/src/coreclr/vm/amd64/excepamd64.cpp index 282a84c7d788db..c3777344101083 100644 --- a/src/coreclr/vm/amd64/excepamd64.cpp +++ b/src/coreclr/vm/amd64/excepamd64.cpp @@ -600,26 +600,44 @@ AdjustContextForVirtualStub( PCODE f_IP = GetIP(pContext); - StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); - - if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) + bool isVirtualStubNullCheck = false; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (VirtualCallStubManager::isCachedInterfaceDispatchStub(f_IP)) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax - { - _ASSERTE(!"AV in DispatchStub at unknown instruction"); - return FALSE; - } + isVirtualStubNullCheck = true; } - else - if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); + + if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) { - _ASSERTE(!"AV in ResolveStub at unknown instruction"); - return FALSE; + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax + { + _ASSERTE(!"AV in DispatchStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } + } + else + if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) + { + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + { + _ASSERTE(!"AV in ResolveStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } } } - else +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { return FALSE; } diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index ea5b5b12ffbd87..07f2b34768a081 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -173,12 +173,14 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); //ASMCONSTANTS_C_ASSERT((1<GetModule(); diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 87b8bfd27f03a6..225fd1d0720ee5 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -90,16 +90,18 @@ class EECodeInfo; enum StubCodeBlockKind : int { - STUB_CODE_BLOCK_UNKNOWN, - STUB_CODE_BLOCK_JUMPSTUB, - STUB_CODE_BLOCK_PRECODE, - STUB_CODE_BLOCK_DYNAMICHELPER, - STUB_CODE_BLOCK_STUBPRECODE, - STUB_CODE_BLOCK_FIXUPPRECODE, - STUB_CODE_BLOCK_VSD_DISPATCH_STUB, - STUB_CODE_BLOCK_VSD_RESOLVE_STUB, - STUB_CODE_BLOCK_VSD_LOOKUP_STUB, - STUB_CODE_BLOCK_VSD_VTABLE_STUB, + STUB_CODE_BLOCK_UNKNOWN = 0, + STUB_CODE_BLOCK_JUMPSTUB = 1, + STUB_CODE_BLOCK_PRECODE = 2, + STUB_CODE_BLOCK_DYNAMICHELPER = 3, + STUB_CODE_BLOCK_STUBPRECODE = 4, + STUB_CODE_BLOCK_FIXUPPRECODE = 5, +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + STUB_CODE_BLOCK_VSD_DISPATCH_STUB = 6, + STUB_CODE_BLOCK_VSD_RESOLVE_STUB = 7, + STUB_CODE_BLOCK_VSD_LOOKUP_STUB = 8, + STUB_CODE_BLOCK_VSD_VTABLE_STUB = 9, +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Last valid value. Note that the definition is duplicated in debug\daccess\fntableaccess.cpp STUB_CODE_BLOCK_LAST = 0xF, // Placeholders returned by code:GetStubCodeBlockKind diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index efcc9f0197aec5..dc03f6bbec35ad 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -1617,7 +1617,7 @@ extern "C" PCODE QCALLTYPE Delegate_AdjustTarget(QCall::ObjectHandleOnStack targ MethodTable* pMTTarg = target.Get()->GetMethodTable(); - MethodDesc *pMeth = Entry2MethodDesc(method, pMTTarg); + MethodDesc *pMeth = NonVirtualEntry2MethodDesc(method); _ASSERTE(pMeth); _ASSERTE(!pMeth->IsStatic()); @@ -1701,7 +1701,7 @@ extern "C" void QCALLTYPE Delegate_Construct(QCall::ObjectHandleOnStack _this, Q pMTTarg = target.Get()->GetMethodTable(); MethodTable* pDelMT = refThis->GetMethodTable(); - MethodDesc* pMethOrig = Entry2MethodDesc(method, pMTTarg); + MethodDesc* pMethOrig = NonVirtualEntry2MethodDesc(method); MethodDesc* pMeth = pMethOrig; _ASSERTE(pMeth != NULL); @@ -1885,13 +1885,7 @@ MethodDesc *COMDelegate::GetMethodDesc(OBJECTREF orDelegate) // Must be a normal delegate code = thisDel->GetMethodPtr(); - OBJECTREF orThis = thisDel->GetTarget(); - if (orThis!=NULL) - { - pMT = orThis->GetMethodTable(); - } - - pMethodHandle = Entry2MethodDesc(code, pMT); + pMethodHandle = NonVirtualEntry2MethodDesc(code); } } diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index d4441504c0811e..26f4fb42e0a973 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6022,6 +6022,12 @@ BOOL IsIPinVirtualStub(PCODE f_IP) return FALSE; } +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (VirtualCallStubManager::isCachedInterfaceDispatchStub(f_IP)) + return TRUE; +#endif + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) @@ -6036,6 +6042,9 @@ BOOL IsIPinVirtualStub(PCODE f_IP) else { return FALSE; } +#else // FEATURE_VIRTUAL_STUB_DISPATCH + return FALSE; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } // Check if the passed in instruction pointer is in one of the diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index cd684755299a48..bb8d2430c21f49 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -2291,32 +2291,6 @@ MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint) } } -//******************************************************************************* -// convert an entry point into a method desc -MethodDesc* Entry2MethodDesc(PCODE entryPoint, MethodTable *pMT) -{ - CONTRACT(MethodDesc*) - { - THROWS; - GC_TRIGGERS; - MODE_ANY; - POSTCONDITION(RETVAL->SanityCheck()); - } - CONTRACT_END - - MethodDesc* pMD = NonVirtualEntry2MethodDesc(entryPoint); - if (pMD != NULL) - RETURN(pMD); - - pMD = VirtualCallStubManagerManager::Entry2MethodDesc(entryPoint, pMT); - if (pMD != NULL) - RETURN(pMD); - - // We should never get here - _ASSERTE(!"Entry2MethodDesc failed"); - RETURN (NULL); -} - //******************************************************************************* BOOL MethodDesc::IsPointingToPrestub() { diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 5d541b84fbb493..af94d350028228 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -2371,9 +2371,6 @@ inline MethodDescChunk *MethodDesc::GetMethodDescChunk() const } MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint); -// convert an entry point into a MethodDesc -MethodDesc* Entry2MethodDesc(PCODE entryPoint, MethodTable *pMT); - typedef DPTR(class StoredSigMethodDesc) PTR_StoredSigMethodDesc; class StoredSigMethodDesc : public MethodDesc diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index b78092f7156336..504d231985765d 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -3294,22 +3294,49 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl COMPlusThrow(kNullReferenceException); } - DispatchToken token; - if (pMT->IsInterface()) +#if defined(FEATURE_VIRTUAL_STUB_DISPATCH) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) + if (UseCachedInterfaceDispatch()) +#endif +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) { + // We don't yet have a proper implementation for cached interface stubs in R2R code, so instead of finding stubs, simply do the resolution in pure C++ + // and skip updating the indirection cell + DispatchToken token; if (pMT->IsInterface()) + { token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + MethodTable* objectType = (*protectedObj)->GetMethodTable(); + VirtualCallStubManager::Resolver(objectType, token, protectedObj, &pCode, TRUE /* throwOnConflict */); + } else - token = DispatchToken::CreateDispatchToken(slot); - - StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); - pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, STUB_CODE_BLOCK_VSD_LOOKUP_STUB); + { + pCode = (*protectedObj)->GetMethodTable()->GetRestoredSlot(slot); // Ensure that the target slot has an entrypoint + } } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#if defined(FEATURE_VIRTUAL_STUB_DISPATCH) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) else +#endif +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH { - pCode = pMgr->GetVTableCallStub(slot); - *(TADDR *)pIndirection = pCode; + DispatchToken token; + if (pMT->IsInterface()) + { + if (pMT->IsInterface()) + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + else + token = DispatchToken::CreateDispatchToken(slot); + + StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); + pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, STUB_CODE_BLOCK_VSD_LOOKUP_STUB); + } + else + { + pCode = pMgr->GetVTableCallStub(slot); + *(TADDR *)pIndirection = pCode; + } } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH _ASSERTE(pCode != (PCODE)NULL); } else diff --git a/src/coreclr/vm/stubmgr.cpp b/src/coreclr/vm/stubmgr.cpp index 38e1ec17a9cb1f..4c6a3649045760 100644 --- a/src/coreclr/vm/stubmgr.cpp +++ b/src/coreclr/vm/stubmgr.cpp @@ -1523,10 +1523,12 @@ BOOL RangeSectionStubManager::CheckIsStub_Internal(PCODE stubStartAddress) case STUB_CODE_BLOCK_JUMPSTUB: case STUB_CODE_BLOCK_STUBLINK: case STUB_CODE_BLOCK_METHOD_CALL_THUNK: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: case STUB_CODE_BLOCK_VSD_RESOLVE_STUB: case STUB_CODE_BLOCK_VSD_LOOKUP_STUB: case STUB_CODE_BLOCK_VSD_VTABLE_STUB: +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return TRUE; default: break; @@ -1558,11 +1560,13 @@ BOOL RangeSectionStubManager::DoTraceStub(PCODE stubStartAddress, TraceDestinati case STUB_CODE_BLOCK_STUBLINK: return StubLinkStubManager::g_pManager->DoTraceStub(stubStartAddress, trace); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: case STUB_CODE_BLOCK_VSD_RESOLVE_STUB: case STUB_CODE_BLOCK_VSD_LOOKUP_STUB: case STUB_CODE_BLOCK_VSD_VTABLE_STUB: return VirtualCallStubManagerManager::GlobalManager()->DoTraceStub(stubStartAddress, trace); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_METHOD_CALL_THUNK: #ifdef DACCESS_COMPILE @@ -1598,6 +1602,7 @@ LPCWSTR RangeSectionStubManager::GetStubManagerName(PCODE addr) case STUB_CODE_BLOCK_METHOD_CALL_THUNK: return W("MethodCallThunk"); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: return W("VSD_DispatchStub"); @@ -1609,6 +1614,7 @@ LPCWSTR RangeSectionStubManager::GetStubManagerName(PCODE addr) case STUB_CODE_BLOCK_VSD_VTABLE_STUB: return W("VSD_VTableStub"); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH default: break; @@ -1738,7 +1744,7 @@ BOOL ILStubManager::TraceManager(Thread *thread, PCODE stubIP = GetIP(pContext); *pRetAddr = (BYTE *)StubManagerHelpers::GetReturnAddress(pContext); - DynamicMethodDesc *pStubMD = Entry2MethodDesc(stubIP, NULL)->AsDynamicMethodDesc(); + DynamicMethodDesc *pStubMD = NonVirtualEntry2MethodDesc(stubIP)->AsDynamicMethodDesc(); TADDR arg = StubManagerHelpers::GetHiddenArg(pContext); Object * pThis = StubManagerHelpers::GetThisPtr(pContext); LOG((LF_CORDB, LL_INFO1000, "ILSM::TraceManager: Enter: StubMD 0x%p, HiddenArg 0x%p, ThisPtr 0x%p\n", @@ -2234,7 +2240,9 @@ VirtualCallStubManager::DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags) WRAPPER_NO_CONTRACT; DAC_ENUM_VTHIS(); EMEM_OUT(("MEM: %p VirtualCallStubManager\n", dac_cast(this))); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH GetCacheEntryRangeList()->EnumMemoryRegions(flags); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } #if defined(TARGET_X86) && !defined(UNIX_X86_ABI) diff --git a/src/coreclr/vm/stubmgr.h b/src/coreclr/vm/stubmgr.h index 7446ac54190c5e..6b1e6dc5ddd676 100644 --- a/src/coreclr/vm/stubmgr.h +++ b/src/coreclr/vm/stubmgr.h @@ -861,6 +861,19 @@ class StubManagerHelpers #endif } + static TADDR GetIndirectionCellArg(T_CONTEXT *pContext) + { +#if defined(TARGET_AMD64) + return pContext->R11; +#elif defined(TARGET_ARM) + return pContext->R4; +#elif defined(TARGET_ARM64) + return pContext->X11; +#else + PORTABILITY_ASSERT("StubManagerHelpers::GetIndirectionCellArg"); + return (TADDR)NULL; +#endif + } }; #endif // !__stubmgr_h__ diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 0edf695a7642b3..1c0c79b8265e5b 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -148,6 +148,7 @@ UINT32 STUB_COLLIDE_WRITE_PCT = 100; UINT32 STUB_COLLIDE_MONO_PCT = 0; #endif // STUB_LOGGING +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH FastTable::NumCallStubs_t FastTable::NumCallStubs; FastTable* BucketTable::dead = NULL; //linked list of the abandoned buckets @@ -155,6 +156,7 @@ FastTable* BucketTable::dead = NULL; //linked list of the abandoned buckets DispatchCache *g_resolveCache = NULL; //cache of dispatch stubs for in line lookup by resolve stubs. size_t g_dispatch_cache_chain_success_counter = CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef STUB_LOGGING UINT32 g_resetCacheCounter; @@ -224,7 +226,9 @@ void VirtualCallStubManager::LoggingDump() it.Current()->LogStats(); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Temp space to use for formatting the output. static const int FMT_STR_SIZE = 160; @@ -408,9 +412,9 @@ void VirtualCallStubManager::LoggingDump() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\ncache data\r\n"); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t total, used; g_resolveCache->GetLoadFactor(&total, &used); - sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_SIZE, "cache_entry_used", used); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_INT, "cache_entry_counter", g_cache_entry_counter); @@ -447,6 +451,7 @@ void VirtualCallStubManager::LoggingDump() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\n"); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); #endif // STUB_LOGGING +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #if 0 for (unsigned i = 0; i < ContractImplMap::max_delta_count; i++) @@ -501,6 +506,7 @@ void VirtualCallStubManager::ResetCache() } CONTRACTL_END +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); g_insert_cache_external = 0; @@ -520,7 +526,7 @@ void VirtualCallStubManager::ResetCache() { it.UnlinkEntry(); } - +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) @@ -539,6 +545,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) m_indCellLock.Init(CrstVSDIndirectionCellLock, CRST_UNSAFE_ANYMODE); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // // Now allocate all BucketTables // @@ -548,6 +555,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) NewHolder lookups_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder vtableCallers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder cache_entries_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Now allocate our LoaderHeaps @@ -559,8 +567,10 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // DWORD indcell_heap_reserve_size; DWORD indcell_heap_commit_size; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH DWORD cache_entry_heap_reserve_size; DWORD cache_entry_heap_commit_size; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Setup an expected number of items to commit and reserve @@ -571,7 +581,9 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // indcell_heap_commit_size = 16; indcell_heap_reserve_size = 2000; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_commit_size = 16; cache_entry_heap_reserve_size = 800; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Convert the number of items into a size in bytes to commit and reserve @@ -579,8 +591,10 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size *= sizeof(void *); indcell_heap_commit_size *= sizeof(void *); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size *= sizeof(ResolveCacheElem); cache_entry_heap_commit_size *= sizeof(ResolveCacheElem); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Align up all of the commit and reserve sizes @@ -588,15 +602,20 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size = (DWORD) ALIGN_UP(indcell_heap_reserve_size, GetOsPageSize()); indcell_heap_commit_size = (DWORD) ALIGN_UP(indcell_heap_commit_size, GetOsPageSize()); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size = (DWORD) ALIGN_UP(cache_entry_heap_reserve_size, GetOsPageSize()); cache_entry_heap_commit_size = (DWORD) ALIGN_UP(cache_entry_heap_commit_size, GetOsPageSize()); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH BYTE * initReservedMem = NULL; if (!m_loaderAllocator->IsCollectible()) { - DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size + - cache_entry_heap_reserve_size; + DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + + cache_entry_heap_reserve_size +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + ; DWORD dwTotalReserveMemSize = (DWORD) ALIGN_UP(dwTotalReserveMemSizeCalc, VIRTUAL_ALLOC_RESERVE_GRANULARITY); @@ -612,12 +631,16 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) DWORD cPagesRemainder = cWastedPages % 2; // We'll throw this at the cache entry heap indcell_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size += (cPagesPerHeap + cPagesRemainder) * GetOsPageSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH CONSISTENCY_CHECK((indcell_heap_reserve_size + cache_entry_heap_reserve_size)== dwTotalReserveMemSize); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); @@ -632,12 +655,17 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size = GetOsPageSize(); indcell_heap_commit_size = GetOsPageSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size = GetOsPageSize(); cache_entry_heap_commit_size = GetOsPageSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef _DEBUG - DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size + - cache_entry_heap_reserve_size; + DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + + cache_entry_heap_reserve_size +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + ; #endif DWORD dwActualVSDSize = 0; @@ -652,13 +680,21 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) } // Hot memory, Writable, No-Execute, infrequent writes + RangeList* pIndCellRangeList = NULL; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + pIndCellRangeList = &indcell_rangeList; + } +#endif NewHolder indcell_heap_holder( new LoaderHeap(indcell_heap_reserve_size, indcell_heap_commit_size, initReservedMem, indcell_heap_reserve_size, - NULL, UnlockedLoaderHeap::HeapKind::Data)); + pIndCellRangeList, UnlockedLoaderHeap::HeapKind::Data)); initReservedMem += indcell_heap_reserve_size; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Hot memory, Writable, No-Execute, infrequent writes NewHolder cache_entry_heap_holder( new LoaderHeap(cache_entry_heap_reserve_size, cache_entry_heap_commit_size, @@ -682,6 +718,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // Hot memory, Writable, Execute, write exactly once NewHolder vtable_heap_holder( new CodeFragmentHeap(pLoaderAllocator, STUB_CODE_BLOCK_VSD_VTABLE_STUB)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Allocate the initial counter block NewHolder m_counters_holder(new counter_block); @@ -691,6 +728,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // indcell_heap = indcell_heap_holder; indcell_heap_holder.SuppressRelease(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH lookup_heap = lookup_heap_holder; lookup_heap_holder.SuppressRelease(); dispatch_heap = dispatch_heap_holder; dispatch_heap_holder.SuppressRelease(); resolve_heap = resolve_heap_holder; resolve_heap_holder.SuppressRelease(); @@ -702,6 +740,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) lookups = lookups_holder; lookups_holder.SuppressRelease(); vtableCallers = vtableCallers_holder; vtableCallers_holder.SuppressRelease(); cache_entries = cache_entries_holder; cache_entries_holder.SuppressRelease(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH m_counters = m_counters_holder; m_counters_holder.SuppressRelease(); @@ -735,6 +774,7 @@ VirtualCallStubManager::~VirtualCallStubManager() LogStats(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Go through each cache entry and if the cache element there is in // the cache entry heap of the manager being deleted, then we just // set the cache entry to empty. @@ -750,8 +790,10 @@ VirtualCallStubManager::~VirtualCallStubManager() } it.Next(); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH if (indcell_heap) { delete indcell_heap; indcell_heap = NULL;} +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (lookup_heap) { delete lookup_heap; lookup_heap = NULL;} if (dispatch_heap) { delete dispatch_heap; dispatch_heap = NULL;} if (resolve_heap) { delete resolve_heap; resolve_heap = NULL;} @@ -763,6 +805,7 @@ VirtualCallStubManager::~VirtualCallStubManager() if (lookups) { delete lookups; lookups = NULL;} if (vtableCallers) { delete vtableCallers; vtableCallers = NULL;} if (cache_entries) { delete cache_entries; cache_entries = NULL;} +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Now get rid of the memory taken by the counter_blocks while (m_counters != NULL) @@ -801,6 +844,7 @@ void VirtualCallStubManager::InitStatic() g_resetCacheIncr = (INT32) CLRConfig::GetConfigValue(CLRConfig::INTERNAL_VirtualCallStubResetCacheIncr); #endif // STUB_LOGGING +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #ifndef STUB_DISPATCH_PORTABLE DispatchHolder::InitializeStatic(); ResolveHolder::InitializeStatic(); @@ -808,6 +852,7 @@ void VirtualCallStubManager::InitStatic() LookupHolder::InitializeStatic(); g_resolveCache = new DispatchCache(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH if(CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_VirtualCallStubLogging)) StartupLogging(); @@ -834,7 +879,9 @@ void VirtualCallStubManager::LogFinalStats() it.Current()->LogStats(); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH FinishLogging(); } @@ -848,6 +895,7 @@ void VirtualCallStubManager::ReclaimAll() STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_FORBID_FAULT; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH /* @todo: if/when app domain unloading is supported, and when we have app domain specific stub heaps, we can complete the unloading of an app domain stub heap at this point, and make any patches to existing stubs that are @@ -863,6 +911,10 @@ void VirtualCallStubManager::ReclaimAll() { it.Current()->Reclaim(); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); +#endif // FEATURE_CACHED_INTERFACE_DISPATCH g_reclaim_counter++; } @@ -903,6 +955,7 @@ void VirtualCallStubManager::Reclaim() //---------------------------------------------------------------------------- /* static */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *VirtualCallStubManager::FindStubManager(PCODE stubAddress, StubCodeBlockKind* wbStubKind) { CONTRACTL { @@ -940,6 +993,7 @@ VirtualCallStubManager *VirtualCallStubManager::FindStubManager(PCODE stubAddres return NULL; } } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* for use by debugger. */ @@ -992,7 +1046,7 @@ BOOL VirtualCallStubManager::TraceManager(Thread *thread, *pRetAddr = (BYTE *)StubManagerHelpers::GetReturnAddress(pContext); // Get the token from the stub - DispatchToken token(GetTokenFromStub(pStub)); + DispatchToken token(GetTokenFromStub(pStub, pContext)); // Get the this object from ECX Object *pObj = StubManagerHelpers::GetThisPtr(pContext); @@ -1026,6 +1080,7 @@ DispatchToken VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(TypeHandle ow return token; } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) { CONTRACTL { @@ -1135,6 +1190,7 @@ VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) RETURN(pHolder); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //+---------------------------------------------------------------------------- // @@ -1218,6 +1274,7 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke RETURN ret; } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ResolveCacheElem *VirtualCallStubManager::GetResolveCacheElem(void *pMT, size_t token, void *target) @@ -1254,10 +1311,11 @@ ResolveCacheElem *VirtualCallStubManager::GetResolveCacheElem(void *pMT, _ASSERTE(elem && (elem != CALL_STUB_EMPTY_ENTRY)); return elem; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif // !DACCESS_COMPILE -size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub) +size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub, T_CONTEXT *pContext) { CONTRACTL { @@ -1267,13 +1325,35 @@ size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub) } CONTRACTL_END +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (isCachedInterfaceDispatchStub(stub)) + { + TADDR indcell = StubManagerHelpers::GetIndirectionCellArg(pContext); + VirtualCallStubManagerIterator it = + VirtualCallStubManagerManager::GlobalManager()->IterateVirtualCallStubManagers(); + while (it.Next()) + { + if (it.Current()->indcell_rangeList.IsInRange(indcell)) + { + InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)indcell; + return pCell->GetDispatchCellInfo().Token.To_SIZE_T(); + } + } + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH _ASSERTE(stub != (PCODE)NULL); StubCodeBlockKind stubKind = STUB_CODE_BLOCK_UNKNOWN; VirtualCallStubManager * pMgr = FindStubManager(stub, &stubKind); return GetTokenFromStubQuick(pMgr, stub, stubKind); +#else + return 0; +#endif } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubCodeBlockKind kind) { CONTRACTL @@ -1327,6 +1407,7 @@ size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pM return 0; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifndef DACCESS_COMPILE @@ -1546,6 +1627,7 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, any other kind of problem, rather than throwing an exception, we will also return the prestub, unless we are unable to find the method at all, in which case we return NULL. */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, size_t token @@ -2125,6 +2207,7 @@ PCODE VirtualCallStubManager::ResolveWorker(StubCallSite* pCallSite, return target; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* Resolve the token in the context of the method table, and set the target to point to @@ -2413,6 +2496,7 @@ MethodDesc *VirtualCallStubManager::GetInterfaceMethodDescFromToken(DispatchToke #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- // This will check to see if a match is in the cache. // Returns the target on success, otherwise NULL. @@ -2430,7 +2514,7 @@ PCODE VirtualCallStubManager::CacheLookup(size_t token, UINT16 tokenHash, Method // If the element matches, return the target - we're done! return (PCODE)(pElem != NULL ? pElem->target : NULL); } - +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* static */ @@ -2458,11 +2542,11 @@ VirtualCallStubManager::GetTarget( PCODE target = (PCODE)NULL; -#ifndef STUB_DISPATCH_PORTABLE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH target = CacheLookup(token.To_SIZE_T(), DispatchCache::INVALID_HASH, pMT); if (target != (PCODE)NULL) return target; -#endif // !STUB_DISPATCH_PORTABLE +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // No match, now do full resolve BOOL fPatch; @@ -2471,7 +2555,7 @@ VirtualCallStubManager::GetTarget( fPatch = Resolver(pMT, token, NULL, &target, throwOnConflict); _ASSERTE(!throwOnConflict || target != (PCODE)NULL); -#ifndef STUB_DISPATCH_PORTABLE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (fPatch) { ResolveCacheElem *pCacheElem = pMT->GetLoaderAllocator()->GetVirtualCallStubManager()-> @@ -2489,7 +2573,7 @@ VirtualCallStubManager::GetTarget( { g_external_call_no_patch++; } -#endif // !STUB_DISPATCH_PORTABLE +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return target; } @@ -2551,6 +2635,7 @@ VirtualCallStubManager::TraceResolver( #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* Change the call site. It is failing the expected MT test in the dispatcher stub too often. @@ -2930,10 +3015,12 @@ LookupHolder *VirtualCallStubManager::GenerateLookupStub(PCODE addrOfResolver, s RETURN (holder); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* Generate a cache entry */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ResolveCacheElem *VirtualCallStubManager::GenerateResolveCacheElem(void *addrOfCode, void *pMTExpected, size_t token, @@ -2981,6 +3068,7 @@ ResolveCacheElem *VirtualCallStubManager::GenerateResolveCacheElem(void *addrOfC return e; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //------------------------------------------------------------------ // Adds the stub manager to our linked list of virtual stub managers @@ -3043,7 +3131,7 @@ void VirtualCallStubManager::LogStats() // Our Init routine assignes all fields atomically so testing one field should suffice to // test whehter the Init succeeded. - if (!resolvers) + if (!m_counters) { return; } @@ -3077,6 +3165,7 @@ void VirtualCallStubManager::LogStats() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_INT, "stub_space", stats.stub_space); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t total, used; g_resolveCache->GetLoadFactor(&total, &used); @@ -3089,14 +3178,17 @@ void VirtualCallStubManager::LogStats() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\ncache_load:\t%zu used, %zu total, utilization %#5.2f%%\r\n", used, total, 100.0 * double(used) / double(total)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH resolvers->LogStats(); dispatchers->LogStats(); lookups->LogStats(); vtableCallers->LogStats(); cache_entries->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH g_site_counter += stats.site_counter; g_stub_lookup_counter += stats.stub_lookup_counter; @@ -3129,6 +3221,7 @@ void VirtualCallStubManager::LogStats() stats.cache_entry_space = 0; } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH void Prober::InitProber(size_t key1, size_t key2, size_t* table) { CONTRACTL { @@ -3787,6 +3880,7 @@ void DispatchCache::LogStats() stats.insert_cache_collide = 0; stats.insert_cache_write = 0; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* The following tablse have bits that have the following properties: 1. Each entry has 12-bits with 5,6 or 7 one bits and 5,6 or 7 zero bits. @@ -3820,6 +3914,7 @@ static const UINT16 tokenHashBits[32] = #endif // HOST_64BIT }; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH /*static*/ UINT16 DispatchCache::HashToken(size_t token) { LIMITED_METHOD_CONTRACT; @@ -3912,10 +4007,12 @@ void DispatchCache::Iterator::NextValidBucket() NextBucket(); } while (IsValid() && *m_ppCurElem == m_pCache->empty); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif // !DACCESS_COMPILE ///////////////////////////////////////////////////////////////////////////////////////////// +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *VirtualCallStubManagerManager::FindVirtualCallStubManager(PCODE stubAddress) { CONTRACTL { @@ -3927,6 +4024,7 @@ VirtualCallStubManager *VirtualCallStubManagerManager::FindVirtualCallStubManage return VirtualCallStubManager::FindStubManager(stubAddress); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH static VirtualCallStubManager * const IT_START = (VirtualCallStubManager *)(-1); @@ -3961,6 +4059,20 @@ VirtualCallStubManager *VirtualCallStubManagerIterator::Current() } #ifndef DACCESS_COMPILE + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +extern "C" void RhpInterfaceDispatch1(); +extern "C" void RhpInterfaceDispatch2(); +extern "C" void RhpInterfaceDispatch4(); +extern "C" void RhpInterfaceDispatch8(); +extern "C" void RhpInterfaceDispatch16(); +extern "C" void RhpInterfaceDispatch32(); +extern "C" void RhpInterfaceDispatch64(); + +extern "C" void RhpVTableOffsetDispatch(); +extern "C" void RhpInitialInterfaceDispatch(); +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + ///////////////////////////////////////////////////////////////////////////////////////////// VirtualCallStubManagerManager::VirtualCallStubManagerManager() : m_pManagers(NULL), @@ -3968,6 +4080,25 @@ VirtualCallStubManagerManager::VirtualCallStubManagerManager() m_RWLock(COOPERATIVE_OR_PREEMPTIVE, LOCK_TYPE_DEFAULT) { LIMITED_METHOD_CONTRACT; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#define CACHED_INTERFACE_DISPATCH_HELPER_COUNT 9 + size_t helperCount = 0; + +#define RECORD_CACHED_INTERFACE_DISPATCH_HELPER(helper) _ASSERTE(helperCount < CACHED_INTERFACE_DISPATCH_HELPER_COUNT); pCachedInterfaceDispatchHelpers[helperCount++] = (PCODE)helper; + pCachedInterfaceDispatchHelpers = new PCODE[CACHED_INTERFACE_DISPATCH_HELPER_COUNT]; + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch1); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch2); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch4); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch8); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch16); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch32); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch64); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpVTableOffsetDispatch); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInitialInterfaceDispatch); + + countCachedInterfaceDispatchHelpers = helperCount; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH } ///////////////////////////////////////////////////////////////////////////////////////////// @@ -4009,16 +4140,26 @@ BOOL VirtualCallStubManagerManager::DoTraceStub( { WRAPPER_NO_CONTRACT; + VirtualCallStubManager *pMgr = NULL; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // Always use the global loader allocator, and find the correct one during the trace itself + pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Find the owning manager. We should succeed, since presumably someone already // called CheckIsStub on us to find out that we own the address, and already // called TraceManager to initiate a trace. - VirtualCallStubManager *pMgr = FindVirtualCallStubManager(stubStartAddress); + pMgr = FindVirtualCallStubManager(stubStartAddress); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + CONSISTENCY_CHECK(CheckPointer(pMgr)); return pMgr->DoTraceStub(stubStartAddress, trace); } #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ///////////////////////////////////////////////////////////////////////////////////////////// MethodDesc *VirtualCallStubManagerManager::Entry2MethodDesc( PCODE stubStartAddress, @@ -4051,6 +4192,7 @@ MethodDesc *VirtualCallStubManagerManager::Entry2MethodDesc( return pMT->GetMethodDescForSlotAddress(target); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif #ifdef DACCESS_COMPILE @@ -4073,11 +4215,29 @@ BOOL VirtualCallStubManagerManager::TraceManager( { WRAPPER_NO_CONTRACT; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // Always use the global loader allocator, and find the correct one during the trace itself + VirtualCallStubManager *pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); +#else // FEATURE_CACHED_INTERFACE_DISPATCH // Find the owning manager. We should succeed, since presumably someone already // called CheckIsStub on us to find out that we own the address. VirtualCallStubManager *pMgr = FindVirtualCallStubManager(GetIP(pContext)); CONSISTENCY_CHECK(CheckPointer(pMgr)); +#endif // FEATURE_CACHED_INTERFACE_DISPATCH // Forward the call to the appropriate manager. return pMgr->TraceManager(thread, trace, pContext, pRetAddr); } + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +bool VirtualCallStubManager::isCachedInterfaceDispatchStub(PCODE addr) +{ + LIMITED_METHOD_DAC_CONTRACT; + + VirtualCallStubManagerManager *pGlobalManager = VirtualCallStubManagerManager::GlobalManager(); + + if (pGlobalManager == NULL) + return false; + return pGlobalManager->isCachedInterfaceDispatchStub(addr); +} +#endif \ No newline at end of file diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 3ee2d2982e284d..10a573e151db80 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -11,7 +11,9 @@ #ifndef _VIRTUAL_CALL_STUB_H #define _VIRTUAL_CALL_STUB_H +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #define CHAIN_LOOKUP +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #if defined(TARGET_X86) // If this is uncommented, leaves a file "StubLog_.log" with statistics on the behavior @@ -23,12 +25,14 @@ ///////////////////////////////////////////////////////////////////////////////////// // Forward class declarations +class VirtualCallStubManager; +class VirtualCallStubManagerManager; + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH class FastTable; class BucketTable; class Entry; class Prober; -class VirtualCallStubManager; -class VirtualCallStubManagerManager; struct LookupHolder; struct DispatchHolder; struct ResolveHolder; @@ -93,6 +97,7 @@ enum e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token, e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target, }; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH ///////////////////////////////////////////////////////////////////////////////////// // A utility class to help manipulate a call site @@ -143,6 +148,8 @@ struct StubCallSite PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } }; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // These are the assembly language entry points that the stubs use when they want to go into the EE extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method @@ -155,6 +162,8 @@ extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForReg #endif // TARGET_UNIX #endif // TARGET_X86 +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; @@ -167,6 +176,7 @@ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; // // call [DispatchCell] // +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable // so we can update the code over time. There are three basic types of stubs that the dispatch cell can point // to. @@ -202,6 +212,8 @@ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; // (in)efficiency forever. // // see code:#StubDispatchNotes for more +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + class VirtualCallStubManager : public StubManager { friend class VirtualCallStubManagerManager; @@ -219,6 +231,7 @@ class VirtualCallStubManager : public StubManager virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; } #endif +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // The reason for our existence, return a callstub for type id and slot number // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an // interface invoke where the id indicates which interface it is. @@ -227,11 +240,11 @@ class VirtualCallStubManager : public StubManager // you'll get the same callstub twice if you call it with identical inputs. PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); PCODE GetCallStub(DispatchToken token); - - static DispatchToken GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); - // Stubs for vtable-based virtual calls with no lookups PCODE GetVTableCallStub(DWORD slot); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + + static DispatchToken GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); // Generate an fresh indirection cell. BYTE* GenerateStubIndirection(PCODE stub, DispatchToken token, BOOL fUseRecycledCell = FALSE); @@ -263,12 +276,18 @@ class VirtualCallStubManager : public StubManager #ifndef DACCESS_COMPILE VirtualCallStubManager() : StubManager(), +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_rangeList(), +#endif +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + indcell_rangeList(), +#endif m_loaderAllocator(NULL), m_initialReservedMemForHeaps(NULL), m_FreeIndCellList(NULL), m_RecycledIndCellList(NULL), indcell_heap(NULL), +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap(NULL), lookup_heap(NULL), dispatch_heap(NULL), @@ -280,6 +299,7 @@ class VirtualCallStubManager : public StubManager cache_entries(NULL), dispatchers(NULL), resolvers(NULL), +#endif // FEATURE_VIRTUAL_STUB_DISPATCH m_counters(NULL), m_cur_counter_block(NULL), m_cur_counter_block_for_reclaim(NULL), @@ -293,17 +313,30 @@ class VirtualCallStubManager : public StubManager ~VirtualCallStubManager(); #endif // !DACCESS_COMPILE + static bool isCachedInterfaceDispatchStub(PCODE addr); + static BOOL isStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (isCachedInterfaceDispatchStub(addr)) + return TRUE; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(addr); return sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB || sk == STUB_CODE_BLOCK_VSD_LOOKUP_STUB || sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB || sk == STUB_CODE_BLOCK_VSD_VTABLE_STUB; +#else + return FALSE; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH static BOOL isDispatchingStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; @@ -346,9 +379,16 @@ class VirtualCallStubManager : public StubManager TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); return PTR_RangeList(addr); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + //use range lists to track the chunks of memory that are part of each heap + LockedRangeList indcell_rangeList; +#endif private: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //allocate and initialize a stub of the desired kind DispatchHolder *GenerateDispatchStub(PCODE addrOfCode, PCODE addrOfFail, @@ -388,7 +428,7 @@ class VirtualCallStubManager : public StubManager ResolveCacheElem *GetResolveCacheElem(void *pMT, size_t token, void *target); - +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // This can be used to find a target without needing the ability to throw static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); @@ -410,14 +450,16 @@ class VirtualCallStubManager : public StubManager static MethodTable *GetTypeFromToken(DispatchToken token); //This is used to get the token out of a stub - static size_t GetTokenFromStub(PCODE stub); + static size_t GetTokenFromStub(PCODE stub, T_CONTEXT *pContext); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //This is used to get the token out of a stub and we know the stub manager and stub kind static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubCodeBlockKind kind); // General utility functions // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Full exhaustive lookup. THROWS, GC_TRIGGERS static PCODE GetTarget(DispatchToken token, MethodTable *pMT, BOOL throwOnConflict); @@ -429,6 +471,7 @@ class VirtualCallStubManager : public StubManager // Given a dispatch token, return true if the token represents a slot on the target. static BOOL IsClassToken(DispatchToken token); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #ifdef CHAIN_LOOKUP static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem); #endif @@ -462,7 +505,7 @@ class VirtualCallStubManager : public StubManager //Change the callsite to point to stub void BackPatchSite(StubCallSite* pCallSite, PCODE stub); - +#endif // VIRTUAL_STUB_DISPATCH public: /* the following two public functions are to support tracing or stepping thru stubs via the debugger. */ @@ -477,8 +520,10 @@ class VirtualCallStubManager : public StubManager size_t retval=0; if(indcell_heap) retval+=indcell_heap->GetSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if(cache_entry_heap) retval+=cache_entry_heap->GetSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return retval; }; @@ -572,6 +617,8 @@ class VirtualCallStubManager : public StubManager #endif // !DACCESS_COMPILE PTR_LoaderHeap indcell_heap; // indirection cells go here + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here PTR_CodeFragmentHeap lookup_heap; // lookup stubs go here PTR_CodeFragmentHeap dispatch_heap; // dispatch stubs go here @@ -598,6 +645,7 @@ class VirtualCallStubManager : public StubManager BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // This structure is used to keep track of the fail counters. // We only need one fail counter per ResolveStub, @@ -621,9 +669,11 @@ class VirtualCallStubManager : public StubManager PTR_VirtualCallStubManager m_pNext; // Linked list pointer public: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Given a stub address, find the VCSManager that owns it. static VirtualCallStubManager *FindStubManager(PCODE addr, StubCodeBlockKind* wbStubKind = NULL); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifndef DACCESS_COMPILE // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList @@ -702,7 +752,19 @@ class VirtualCallStubManagerManager : public StubManager #ifdef DACCESS_COMPILE virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); virtual LPCWSTR GetStubManagerName(PCODE addr) - { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); } + { + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_VIRTUAL_STUB_DISPATCH + return W("CachedInterfaceDispatchStubManagerManager"); +#else +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch() && VirtualCallStubManager::isCachedInterfaceDispatchStub(addr)) + return W("CachedInterfaceDispatchStubManagerManager"); + else +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); +#endif + } #endif private: @@ -720,9 +782,16 @@ class VirtualCallStubManagerManager : public StubManager // RW lock for reading entries and removing them. SimpleRWLock m_RWLock; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + DPTR(PCODE) pCachedInterfaceDispatchHelpers; + size_t countCachedInterfaceDispatchHelpers = 0; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + // This will look through all the managers in an intelligent fashion to // find the manager that owns the address. +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress); +#endif protected: // Add a VCSManager to the linked list. @@ -746,6 +815,19 @@ class VirtualCallStubManagerManager : public StubManager VirtualCallStubManagerIterator IterateVirtualCallStubManagers(); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + bool isCachedInterfaceDispatchStub(PCODE addr) + { + LIMITED_METHOD_DAC_CONTRACT; + for (size_t i = 0; i < countCachedInterfaceDispatchHelpers; i++) + { + if (pCachedInterfaceDispatchHelpers[i] == addr) + return true; + } + return false; + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifdef _DEBUG // Debug helper to help identify stub-managers. virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; } @@ -892,7 +974,7 @@ class Entry }; /* define the platform specific Stubs and stub holders */ - +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #include #if USES_LOOKUP_STUBS @@ -1102,7 +1184,7 @@ class DispatchEntry : public Entry { ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget()); size_t token = resolveHolder->stub()->token(); - _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub)); + _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub, NULL)); return token; } else @@ -1539,6 +1621,8 @@ class BucketTable static FastTable* dead; //linked list head of to be deleted (abandoned) buckets }; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver); BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver); From 73b0b26dbc8cd166ce6f0df0d489f0fc32bdf155 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 24 Jan 2025 09:46:11 -0800 Subject: [PATCH 11/41] Fix X86 build --- src/coreclr/vm/stubmgr.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/vm/stubmgr.h b/src/coreclr/vm/stubmgr.h index 6b1e6dc5ddd676..0a812f430497ea 100644 --- a/src/coreclr/vm/stubmgr.h +++ b/src/coreclr/vm/stubmgr.h @@ -861,6 +861,7 @@ class StubManagerHelpers #endif } +#if !defined(TARGET_X86) static TADDR GetIndirectionCellArg(T_CONTEXT *pContext) { #if defined(TARGET_AMD64) @@ -874,6 +875,7 @@ class StubManagerHelpers return (TADDR)NULL; #endif } +#endif // !defined(TARGET_X86) }; #endif // !__stubmgr_h__ From 2cdd955448645a920195cac13a85570e6b4f8e42 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 24 Jan 2025 10:52:05 -0800 Subject: [PATCH 12/41] Get Linux Arm64 and Amd64 into a possibly good state --- .../Runtime/CachedInterfaceDispatchAot.cpp | 5 -- .../Runtime/CachedInterfaceDispatchPal.h | 11 ++++ .../amd64/CachedInterfaceDispatchAot.S | 26 +++++++++ .../Runtime/arm64/AsmMacros_Shared.h | 7 ++- .../arm64/CachedInterfaceDispatchAot.S | 49 ++++++++++++++++ .../shared_runtime/amd64/StubDispatch.S | 15 ----- .../shared_runtime/arm64/StubDispatch.S | 39 +------------ src/coreclr/vm/CMakeLists.txt | 19 ++++++- .../vm/amd64/CachedInterfaceDispatchCoreCLR.S | 54 ++++++++++++++++++ src/coreclr/vm/arm64/AsmMacros_Shared.h | 5 ++ .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 56 +++++++++++++++++++ 11 files changed, 226 insertions(+), 60 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S create mode 100644 src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S create mode 100644 src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S create mode 100644 src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp index 1898dd058d5bf9..4244e3617e9f79 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp @@ -4,11 +4,6 @@ #include "common.h" #include "CachedInterfaceDispatchPal.h" #include "CachedInterfaceDispatch.h" -#include "RedhawkWarnings.h" -#include "TargetPtrs.h" -#include "MethodTable.h" -#include "Range.h" -#include "allocheap.h" // The base memory allocator. static AllocHeap * g_pAllocHeap = NULL; diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h index 938a82a97cb293..3b2762ad4185c5 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h @@ -14,7 +14,18 @@ #include "slist.h" #include "holder.h" #include "Crst.h" +#include "RedhawkWarnings.h" +#include "TargetPtrs.h" +#include "MethodTable.h" +#include "Range.h" +#include "allocheap.h" #include "rhbinder.h" +#include "ObjectLayout.h" +#include "shash.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "MethodTable.inl" +#include "CommonMacros.inl" bool InterfaceDispatch_InitializePal(); diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S new file mode 100644 index 00000000000000..9796df5e6742e3 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// trick to avoid PLT relocation at runtime which corrupts registers +#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // UNIXTODO: Implement this function + int 3 +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r10 contains indirection cell address, move to r11 where it will be passed by + // the universal transition thunk as an argument to RhpCidResolve + mov r11, r10 + mov r10, [rip + REL_C_FUNC(RhpCidResolve)] + jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] + +LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h index 9b15544d43e036..f67496574352de 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h @@ -3,4 +3,9 @@ // This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible -#include "AsmMacros.h" \ No newline at end of file +#ifdef TARGET_WINDOWS +#include "AsmMacros.h" +#else +#include +#include "AsmOffsets.inc" +#endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S new file mode 100644 index 00000000000000..5a1220b47398a9 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S @@ -0,0 +1,49 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // x11 contains the interface dispatch cell address. + // load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the MethodTable from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + // Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // x11 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // xip0: target address for the thunk to call + // xip1: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 + mov xip1, x11 + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.S b/src/coreclr/shared_runtime/amd64/StubDispatch.S index 9e1239d1de0635..fddfde3a22536d 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.S +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.S @@ -60,12 +60,6 @@ DEFINE_INTERFACE_DISPATCH_STUB 16 DEFINE_INTERFACE_DISPATCH_STUB 32 DEFINE_INTERFACE_DISPATCH_STUB 64 -// Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // UNIXTODO: Implement this function - int 3 -LEAF_END RhpVTableOffsetDispatch, _TEXT - // Initial dispatch on an interface when we don't have a cache yet. LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch @@ -80,13 +74,4 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // r10 contains indirection cell address, move to r11 where it will be passed by - // the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - mov r10, [rip + REL_C_FUNC(RhpCidResolve)] - jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] -LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/shared_runtime/arm64/StubDispatch.S b/src/coreclr/shared_runtime/arm64/StubDispatch.S index 5d3d11cf4108f8..1155e6ac257a1a 100644 --- a/src/coreclr/shared_runtime/arm64/StubDispatch.S +++ b/src/coreclr/shared_runtime/arm64/StubDispatch.S @@ -1,14 +1,10 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include -#include "AsmOffsets.inc" +#include "AsmMacros_Shared.h" #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - // Macro that generates code to check a single cache entry. .macro CHECK_CACHE_ENTRY entry // Check a single entry in the cache. @@ -83,37 +79,4 @@ b C_FUNC(RhpInterfaceDispatchSlow) LEAF_END RhpInitialInterfaceDispatch, _TEXT -// -// Stub dispatch routine for dispatch to a vtable slot -// - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // x11 contains the interface dispatch cell address. - // load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in x0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - // Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch, _TEXT - -// -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution. -// - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // x11 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // xip0: target address for the thunk to call - // xip1: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 - mov xip1, x11 - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - #endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index bfbb79090f33e7..ca63699d25fd63 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -715,8 +715,20 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/Context.S ${ARCH_SOURCES_DIR}/unixasmhelpers.S ${ARCH_SOURCES_DIR}/umthunkstub.S - ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S ) + + if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S + ) + endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) + + if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S + ) + endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/ehhelpers.S @@ -743,6 +755,11 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) + if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) + list(APPEND VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S + ) + endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 00000000000000..2371ee178ace7b --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,54 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + + extern CID_ResolveWorker:proc + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + add rax, [rcx] + + ;; Load the target address of the vtable chunk into rax + mov rax, [rax] + + ;; Compute the chunk offset + shr r11d, 16 + + ;; Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rsi, r11 ; indirection cell + + call CID_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + + end \ No newline at end of file diff --git a/src/coreclr/vm/arm64/AsmMacros_Shared.h b/src/coreclr/vm/arm64/AsmMacros_Shared.h index 33b9ddaa26e0f5..06a05595cb977e 100644 --- a/src/coreclr/vm/arm64/AsmMacros_Shared.h +++ b/src/coreclr/vm/arm64/AsmMacros_Shared.h @@ -3,6 +3,11 @@ // This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible +#ifdef TARGET_WINDOWS #include "ksarm64.h" #include "asmconstants.h" #include "asmmacros.h" +#else +#include "asmconstants.h" +#include "unixasmmacros.inc" +#endif diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 00000000000000..3a34e44d868171 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,56 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + lsr x10, x11, #32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + ldr x9, [x0] + add x9, x10, x9 + + ;; Load the target address of the vtable chunk into rax + ldr x9, [x9] + + ;; Compute the chunk offset + ubfx x10, x11, #16, #16 + + ;; Load the target address of the virtual function into rax + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; x11 contains the interface dispatch cell address. +;; + NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl C_FUNC(CID_ResolveWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow, _TEXT + + END From edad8340118542ecd908a8417f6a05854b901e28 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 24 Jan 2025 14:39:06 -0800 Subject: [PATCH 13/41] Enable cached interface dispatch to build properly on Linux Amd64. Note that this requires adding the -mcx16 switch to clang, so that cmpxchg16b instruction gets generated, which is an increase in the baseline CPU required by CoreCLR on Linux, and isn't likely to be OK for shipping publicly --- src/coreclr/clrfeatures.cmake | 10 +++-- src/coreclr/nativeaot/CMakeLists.txt | 2 +- .../Runtime/amd64/AsmMacros_Shared.h | 7 ++++ src/coreclr/pal/inc/unixasmmacrosamd64.inc | 10 +++++ .../shared_runtime/amd64/StubDispatch.S | 3 +- src/coreclr/vm/CachedInterfaceDispatchPal.h | 2 +- src/coreclr/vm/amd64/AsmMacros_Shared.h | 8 ++++ .../vm/amd64/CachedInterfaceDispatchCoreCLR.S | 40 +++++++++---------- 8 files changed, 52 insertions(+), 30 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h create mode 100644 src/coreclr/vm/amd64/AsmMacros_Shared.h diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 9d7e8017489b22..30776069c00062 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -40,12 +40,14 @@ if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) endif(CLR_CMAKE_TARGET_WIN32) -if (CLR_CMAKE_TARGET_WIN32) - if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) - set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + if (CLR_CMAKE_TARGET_UNIX AND CLR_CMAKE_TARGET_ARCH_AMD64) + # Allow 16 byte compare-exchange (cmpxchg16b) + add_compile_options(-mcx16) endif() + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) endif() -if (NOT (CLR_CMAKE_TARGET_WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64))) +if (NOT (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)) set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) endif() diff --git a/src/coreclr/nativeaot/CMakeLists.txt b/src/coreclr/nativeaot/CMakeLists.txt index 71e9567b91e54b..e1c43480500970 100644 --- a/src/coreclr/nativeaot/CMakeLists.txt +++ b/src/coreclr/nativeaot/CMakeLists.txt @@ -23,7 +23,7 @@ if(CLR_CMAKE_HOST_UNIX) endif(CLR_CMAKE_TARGET_APPLE) if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) - # Allow 16 byte compare-exchange + # Allow 16 byte compare-exchange (cmpxchg16b) add_compile_options(-mcx16) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif (CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..eb9905ffca6383 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include // generated by the build from AsmOffsets.cpp +#include diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index bc6d770a51824a..dcd16524e31c83 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -32,6 +32,16 @@ C_FUNC(\Name) = . .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.S b/src/coreclr/shared_runtime/amd64/StubDispatch.S index fddfde3a22536d..b93fa628c10007 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.S +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.S @@ -2,8 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix -#include // generated by the build from AsmOffsets.cpp -#include +#include "AsmMacros_Shared.h" // trick to avoid PLT relocation at runtime which corrupts registers #define REL_C_FUNC(name) C_FUNC(name)@gotpcrel diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index 7c992f57508e1d..8e9e5eb4e99a60 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -17,7 +17,7 @@ FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *p // But this emulation by libatomic doesn't satisfy requirements here which it must update two adjacent pointers atomically. // this is being discussed in https://github.com/dotnet/runtime/issues/109276. __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); - PalInterlockedOperationBarrier(); + PAL_InterlockedOperationBarrier(); pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); return iComparand == iResult; } diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.h b/src/coreclr/vm/amd64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..87920d58b2ac65 --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "unixasmmacros.inc" +#include "asmconstants.h" + diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S index 2371ee178ace7b..707d25f319737a 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -1,54 +1,50 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix #include "unixasmmacros.inc" #include "asmconstants.h" - extern CID_ResolveWorker:proc - -;; Stub dispatch routine for dispatch to a vtable slot +// Stub dispatch routine for dispatch to a vtable slot LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r11 currently contains the indirection cell address. - ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + // r11 currently contains the indirection cell address. + // load r11 to point to the vtable offset (which is stored in the m_pCache field). mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] - ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust - ;; to get to the VTable chunk + // r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk mov rax, r11 shr rax, 32 - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable chunk list of what we want to dereference + // Load the MethodTable from the object instance in rcx, and add it to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference add rax, [rcx] - ;; Load the target address of the vtable chunk into rax + // Load the target address of the vtable chunk into rax mov rax, [rax] - ;; Compute the chunk offset + // Compute the chunk offset shr r11d, 16 - ;; Load the target address of the virtual function into rax + // Load the target address of the virtual function into rax mov rax, [rax + r11] TAILJMP_RAX LEAF_END RhpVTableOffsetDispatch, _TEXT -;; On Input: -;; r11 contains the address of the indirection cell -;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +// On Input: +// r11 contains the address of the indirection cell +// [rsp+0] m_ReturnAddress: contains the return address of caller to stub NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK - lea rdi, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock - mov rsi, r11 ; indirection cell + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell - call CID_ResolveWorker + call C_FUNC(CID_ResolveWorker) EPILOG_WITH_TRANSITION_BLOCK_TAILCALL TAILJMP_RAX NESTED_END RhpInterfaceDispatchSlow, _TEXT - - end \ No newline at end of file From df393d93bc744955ea7ec24770f0d4cb328da7c4 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 24 Jan 2025 23:19:39 +0000 Subject: [PATCH 14/41] Enable building cached interface dispatch for Linux arm64 --- .../arm64/CachedInterfaceDispatchAot.S | 8 +--- src/coreclr/pal/inc/unixasmmacrosarm64.inc | 11 +++++ .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 43 +++++++++---------- src/coreclr/vm/arm64/asmhelpers.S | 4 +- 4 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S index 5a1220b47398a9..6b63971d4a4015 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S @@ -1,11 +1,9 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. #include #include "AsmOffsets.inc" - TEXTAREA - #ifdef FEATURE_CACHED_INTERFACE_DISPATCH .extern RhpCidResolve @@ -45,5 +43,3 @@ LEAF_END RhpInterfaceDispatchSlow, _TEXT #endif // FEATURE_CACHED_INTERFACE_DISPATCH - - END diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 9e86779d4511bc..1e9a8a1e2bba7e 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -26,6 +26,17 @@ C_FUNC(\Name): .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index 3a34e44d868171..b0a5fb38b76c80 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -1,49 +1,49 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. -#include "unixasmmacros.inc" #include "asmconstants.h" +#include "unixasmmacros.inc" -;; -;; Stub dispatch routine for dispatch to a vtable slot -;; +// +// Stub dispatch routine for dispatch to a vtable slot +// LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r11 currently contains the indirection cell address. - ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + // r11 currently contains the indirection cell address. + // load r11 to point to the vtable offset (which is stored in the m_pCache field). ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust - ;; to get to the VTable chunk + // r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk lsr x10, x11, #32 - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable chunk list of what we want to dereference + // Load the MethodTable from the object instance in rcx, and add it to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference ldr x9, [x0] add x9, x10, x9 - ;; Load the target address of the vtable chunk into rax + // Load the target address of the vtable chunk into rax ldr x9, [x9] - ;; Compute the chunk offset + // Compute the chunk offset ubfx x10, x11, #16, #16 - ;; Load the target address of the virtual function into rax + // Load the target address of the virtual function into rax ldr x9, [x9, x10] EPILOG_BRANCH_REG x9 LEAF_END RhpVTableOffsetDispatch, _TEXT -;; -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; x11 contains the interface dispatch cell address. -;; +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// x11 contains the interface dispatch cell address. +// NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK - add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock - mov x1, x11 ; indirection cell + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x11 // indirection cell bl C_FUNC(CID_ResolveWorker) @@ -53,4 +53,3 @@ EPILOG_BRANCH_REG x9 NESTED_END RhpInterfaceDispatchSlow, _TEXT - END diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 8414ffdda21f15..4daaf42d36b610 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -522,6 +522,7 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // x12 contains our contract (DispatchToken) // x16,x17 will be trashed // +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #define BACKPATCH_FLAG 1 #define PROMOTE_CHAIN_FLAG 2 @@ -566,7 +567,6 @@ LOCAL_LABEL(Fail): NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT -#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // ------------------------------------------------------------------ // void ResolveWorkerAsmStub(args in regs x0-x7 & stack and possibly retbuf arg in x8, x11:IndirectionCellAndFlags, x12:DispatchToken) // @@ -587,7 +587,7 @@ NESTED_ENTRY ResolveWorkerAsmStub, _TEXT, NoHandler EPILOG_BRANCH_REG x9 NESTED_END ResolveWorkerAsmStub, _TEXT -#ifdef FEATURE_VIRTUAL_STUB_DISPATCH +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler From cce3bcb12c528616eef67f65a6d7435e2faab7ca Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 24 Jan 2025 23:41:50 +0000 Subject: [PATCH 15/41] Add AVLocation for the VTable helper which wasn't present in the NativeAOT cached interface dispatch implementation (as it isn't actually used) Update IsIPinVirtualStub to check the AVLocations, not the stub entry points --- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 2 + .../amd64/CachedInterfaceDispatchAot.S | 1 + .../amd64/CachedInterfaceDispatchAot.asm | 1 + .../arm64/CachedInterfaceDispatchAot.S | 1 + src/coreclr/shared_runtime/arm/StubDispatch.S | 1 + .../shared_runtime/loongarch64/StubDispatch.S | 1 + .../vm/amd64/CachedInterfaceDispatchCoreCLR.S | 1 + .../amd64/CachedInterfaceDispatchCoreCLR.asm | 1 + .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 1 + src/coreclr/vm/excep.cpp | 2 +- src/coreclr/vm/virtualcallstub.cpp | 37 +++++++++++++++++++ src/coreclr/vm/virtualcallstub.h | 13 +++++++ 12 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 569cf36e84fa50..9db9fe17e3947b 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -358,6 +358,7 @@ EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation8; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation16; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation32; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation64; +EXTERN_C CODE_LOCATION RhpVTableOffsetDispatchAVLocation; static bool InInterfaceDispatchHelper(uintptr_t faultingIP) { @@ -372,6 +373,7 @@ static bool InInterfaceDispatchHelper(uintptr_t faultingIP) (uintptr_t)&RhpInterfaceDispatchAVLocation16, (uintptr_t)&RhpInterfaceDispatchAVLocation32, (uintptr_t)&RhpInterfaceDispatchAVLocation64, + (uintptr_t)&RhpVTableOffsetDispatchAVLocation, }; // compare the IP against the list of known possible AV locations in the interface dispatch helpers diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S index 9796df5e6742e3..92c9aff7a32ecb 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S @@ -11,6 +11,7 @@ // Stub dispatch routine for dispatch to a vtable slot LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT // UNIXTODO: Implement this function + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation int 3 LEAF_END RhpVTableOffsetDispatch, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm index a85ecfb05b6f8c..e1caae3adedbc0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm @@ -18,6 +18,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset ;; to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation add rax, [rcx] ;; Load the target address of the vtable into rax diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S index 6b63971d4a4015..d2a1131c2c8686 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S @@ -19,6 +19,7 @@ // Load the MethodTable from the object instance in x0, and add it to the vtable offset // to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x13, [x0] add x12, x12, x13 diff --git a/src/coreclr/shared_runtime/arm/StubDispatch.S b/src/coreclr/shared_runtime/arm/StubDispatch.S index 7c2f0bef20afdc..6b9344d3d748e2 100644 --- a/src/coreclr/shared_runtime/arm/StubDispatch.S +++ b/src/coreclr/shared_runtime/arm/StubDispatch.S @@ -88,6 +88,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT ldr r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] // Load the MethodTable from the object instance in r0. + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr r1, [r0] // add the vtable offset to the MethodTable pointer diff --git a/src/coreclr/shared_runtime/loongarch64/StubDispatch.S b/src/coreclr/shared_runtime/loongarch64/StubDispatch.S index 138992ef1a3294..a85cafa3389e00 100644 --- a/src/coreclr/shared_runtime/loongarch64/StubDispatch.S +++ b/src/coreclr/shared_runtime/loongarch64/StubDispatch.S @@ -92,6 +92,7 @@ // Load the MethodTable from the object instance in a0, and add it to the vtable offset // to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ld.d $t4, $a0, 0 add.d $t3, $t3, $t4 diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S index 707d25f319737a..a127d134142f2d 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -18,6 +18,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT // Load the MethodTable from the object instance in rcx, and add it to the vtable offset // to get the address in the vtable chunk list of what we want to dereference +ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation add rax, [rcx] // Load the target address of the vtable chunk into rax diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm index 9ed5b458204d82..804ed7bb77b065 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -19,6 +19,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset ;; to get the address in the vtable chunk list of what we want to dereference +ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation add rax, [rcx] ;; Load the target address of the vtable chunk into rax diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index b0a5fb38b76c80..70646e98ed6d06 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -19,6 +19,7 @@ // Load the MethodTable from the object instance in rcx, and add it to the vtable offset // to get the address in the vtable chunk list of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x9, [x0] add x9, x10, x9 diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 26f4fb42e0a973..625fa857c05bcf 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6023,7 +6023,7 @@ BOOL IsIPinVirtualStub(PCODE f_IP) } #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - if (VirtualCallStubManager::isCachedInterfaceDispatchStub(f_IP)) + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) return TRUE; #endif diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 1c0c79b8265e5b..ff986aa27364a0 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -4071,6 +4071,16 @@ extern "C" void RhpInterfaceDispatch64(); extern "C" void RhpVTableOffsetDispatch(); extern "C" void RhpInitialInterfaceDispatch(); + +extern "C" void RhpInterfaceDispatchAVLocation1(); +extern "C" void RhpInterfaceDispatchAVLocation2(); +extern "C" void RhpInterfaceDispatchAVLocation4(); +extern "C" void RhpInterfaceDispatchAVLocation8(); +extern "C" void RhpInterfaceDispatchAVLocation16(); +extern "C" void RhpInterfaceDispatchAVLocation32(); +extern "C" void RhpInterfaceDispatchAVLocation64(); +extern "C" void RhpVTableOffsetDispatchAVLocation(); + #endif // FEATURE_CACHED_INTERFACE_DISPATCH ///////////////////////////////////////////////////////////////////////////////////////////// @@ -4086,6 +4096,8 @@ VirtualCallStubManagerManager::VirtualCallStubManagerManager() size_t helperCount = 0; #define RECORD_CACHED_INTERFACE_DISPATCH_HELPER(helper) _ASSERTE(helperCount < CACHED_INTERFACE_DISPATCH_HELPER_COUNT); pCachedInterfaceDispatchHelpers[helperCount++] = (PCODE)helper; +#define RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(helper) _ASSERTE(helperCount < CACHED_INTERFACE_DISPATCH_HELPER_COUNT); pCachedInterfaceDispatchHelpersAVLocation[helperCount++] = (PCODE)helper; + pCachedInterfaceDispatchHelpers = new PCODE[CACHED_INTERFACE_DISPATCH_HELPER_COUNT]; RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch1); RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch2); @@ -4096,6 +4108,20 @@ VirtualCallStubManagerManager::VirtualCallStubManagerManager() RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch64); RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpVTableOffsetDispatch); RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInitialInterfaceDispatch); + _ASSERTE(helperCount == CACHED_INTERFACE_DISPATCH_HELPER_COUNT); + + helperCount = 0; + pCachedInterfaceDispatchHelpersAVLocation = new PCODE[CACHED_INTERFACE_DISPATCH_HELPER_COUNT]; + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation1); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation2); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation4); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation8); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation16); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation32); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation64); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpVTableOffsetDispatchAVLocation); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInitialInterfaceDispatch); + _ASSERTE(helperCount == CACHED_INTERFACE_DISPATCH_HELPER_COUNT); countCachedInterfaceDispatchHelpers = helperCount; #endif // FEATURE_CACHED_INTERFACE_DISPATCH @@ -4240,4 +4266,15 @@ bool VirtualCallStubManager::isCachedInterfaceDispatchStub(PCODE addr) return false; return pGlobalManager->isCachedInterfaceDispatchStub(addr); } + +bool VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(PCODE addr) +{ + LIMITED_METHOD_DAC_CONTRACT; + + VirtualCallStubManagerManager *pGlobalManager = VirtualCallStubManagerManager::GlobalManager(); + + if (pGlobalManager == NULL) + return false; + return pGlobalManager->isCachedInterfaceDispatchStubAVLocation(addr); +} #endif \ No newline at end of file diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 10a573e151db80..3f457ddde83ee8 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -314,6 +314,7 @@ class VirtualCallStubManager : public StubManager #endif // !DACCESS_COMPILE static bool isCachedInterfaceDispatchStub(PCODE addr); + static bool isCachedInterfaceDispatchStubAVLocation(PCODE addr); static BOOL isStubStatic(PCODE addr) { @@ -784,6 +785,7 @@ class VirtualCallStubManagerManager : public StubManager #ifdef FEATURE_CACHED_INTERFACE_DISPATCH DPTR(PCODE) pCachedInterfaceDispatchHelpers; + DPTR(PCODE) pCachedInterfaceDispatchHelpersAVLocation; size_t countCachedInterfaceDispatchHelpers = 0; #endif // FEATURE_CACHED_INTERFACE_DISPATCH @@ -826,6 +828,17 @@ class VirtualCallStubManagerManager : public StubManager } return false; } + + bool isCachedInterfaceDispatchStubAVLocation(PCODE addr) + { + LIMITED_METHOD_DAC_CONTRACT; + for (size_t i = 0; i < countCachedInterfaceDispatchHelpers; i++) + { + if (pCachedInterfaceDispatchHelpersAVLocation[i] == addr) + return true; + } + return false; + } #endif // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef _DEBUG From c320e1d7087757fdb81b00d5360dda7b84431a48 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Sat, 25 Jan 2025 00:09:45 +0000 Subject: [PATCH 16/41] Fix musl build failure --- src/coreclr/vm/jitinterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 338bfb3b2c52e1..a10d63e4621fdb 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13674,7 +13674,7 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, slot); INTERFACE_DISPATCH_CACHED_OR_VSD( - return NULL; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method + return FALSE; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method , result = pMgr->GetCallStub(token); ); From 361588a838838275417ae7a4390459831614740e Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Sat, 25 Jan 2025 00:32:56 +0000 Subject: [PATCH 17/41] Handle missed RhpVTableOffsetDispatchAVLocation case --- .../nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm | 1 + src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm | 1 + 2 files changed, 2 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm index 03e9cffed260f0..5b97f041f75f1d 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm @@ -20,6 +20,7 @@ ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset ;; to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x13, [x0] add x12, x12, x13 diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm index 2f82d9664c0538..aaee6655e4814b 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -24,6 +24,7 @@ ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset ;; to get the address in the vtable chunk list of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x9, [x0] add x9, x10, x9 From 24e78b23fc9624791b9c8474b180db4e4baa7384 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Sat, 25 Jan 2025 00:36:37 +0000 Subject: [PATCH 18/41] Move RiscV stub dispatch logic to the same place as everything else --- .../{nativeaot/Runtime => shared_runtime}/riscv64/StubDispatch.S | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/coreclr/{nativeaot/Runtime => shared_runtime}/riscv64/StubDispatch.S (100%) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/shared_runtime/riscv64/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S rename to src/coreclr/shared_runtime/riscv64/StubDispatch.S From 5b0e5ac2d522dc769fab5a0df20ecdda57adacac Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 27 Jan 2025 13:30:07 -0800 Subject: [PATCH 19/41] Fix assertion issue with collectible assemblies --- src/coreclr/vm/virtualcallstub.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index ff986aa27364a0..5514828fb2aa02 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -658,6 +658,9 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) #ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size = GetOsPageSize(); cache_entry_heap_commit_size = GetOsPageSize(); +#else + // If we don't support VSD, use a slightly bigger heap size to avoid wasting memory + indcell_heap_reserve_size = 2 * GetOsPageSize(); #endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef _DEBUG From fa7826ae0695f4b41dd8f99162ffcb85fbbd0289 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 27 Jan 2025 15:05:31 -0800 Subject: [PATCH 20/41] Reduce InterfaceDispatchCell size from 4 pointers to 2, and actually hook up the VTable offset logic and such (vtable paths are untested) --- src/coreclr/vm/CachedInterfaceDispatchPal.h | 106 +++++++++++--------- src/coreclr/vm/contractimpl.h | 51 +++++++++- src/coreclr/vm/methodtable.h | 2 + src/coreclr/vm/virtualcallstub.cpp | 7 +- 4 files changed, 111 insertions(+), 55 deletions(-) diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index 8e9e5eb4e99a60..2558f3ea265e02 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -40,6 +40,17 @@ void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); // Allocate memory aligned at at least sizeof(void*) void *InterfaceDispatch_AllocPointerAligned(size_t size); +enum Flags +{ + // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for + // extra fields on this type. + // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such + IDC_CachePointerPointsIsVTableOffset = 0x2, + IDC_CachePointerPointsAtCache = 0x0, + IDC_CachePointerMask = 0x3, + IDC_CachePointerMaskShift = 0x2, +}; + enum class DispatchCellType { InterfaceAndSlot = 0x0, @@ -48,15 +59,41 @@ enum class DispatchCellType struct DispatchCellInfo { - DispatchCellType CellType; - DispatchToken Token; +private: + static DispatchCellType CellTypeFromToken(DispatchToken token) + { + if (token.IsThisToken()) + { + return DispatchCellType::VTableOffset; + } + return DispatchCellType::InterfaceAndSlot; + } +public: + + DispatchCellInfo(DispatchToken token, bool hasCache) : + CellType(CellTypeFromToken(token)), + Token(token), + HasCache(hasCache ? 1 : 0) + { + + } + const DispatchCellType CellType; + const DispatchToken Token; uintptr_t GetVTableOffset() const { + if (CellType == DispatchCellType::VTableOffset) + { + uint32_t slot = Token.GetSlotNumber(); + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + + return (((uintptr_t)offsetOfIndirection) << (TARGET_POINTER_SIZE / 2)) + (((uintptr_t)offsetAfterIndirection) << TARGET_POINTER_SIZE / 4) | (uintptr_t)IDC_CachePointerPointsIsVTableOffset; + } return 0; } - uint8_t HasCache = 0; + const uint8_t HasCache = 0; }; struct InterfaceDispatchCacheHeader @@ -74,34 +111,16 @@ struct InterfaceDispatchCacheHeader void Initialize(DispatchToken token) { m_token = token; -/* m_vtableOffset = 0; - m_vtableSecondLevelOffset = 0;*/ } void Initialize(const DispatchCellInfo *pNewCellInfo) { m_token = pNewCellInfo->Token; } -/* void Initialize(uint32_t vtableOffset, uint32_t vtableSecondLevelOffset) - { - m_pMD = nullptr; - m_vtableOffset = (uint16_t)vtableOffset; - m_vtableSecondLevelOffset = (uint16_t)vtableSecondLevelOffset; - }*/ DispatchCellInfo GetDispatchCellInfo() { - DispatchCellInfo cellInfo; - if (m_token.IsThisToken()) - { - cellInfo.CellType = DispatchCellType::VTableOffset; - } - else - { - cellInfo.CellType = DispatchCellType::InterfaceAndSlot; - } - cellInfo.Token = m_token; - cellInfo.HasCache = 1; + DispatchCellInfo cellInfo(m_token, true); return cellInfo; } @@ -121,39 +140,29 @@ struct InterfaceDispatchCell volatile TADDR m_pCache; // Context used by the stub above (one or both of the low two bits are set // for initial dispatch, and if not set, using this as a cache pointer or // as a vtable offset.) - TADDR dummy; // Padding to make the size of the structure a multiple of 2 pointers - DispatchToken m_token; - - enum Flags - { - // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for - // extra fields on this type. - // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such - IDC_CachePointerPointerUninitialized = 0x2, - IDC_CachePointerPointsIsVTableOffset = 0x1, - IDC_CachePointerPointsAtCache = 0x0, - IDC_CachePointerMask = 0x3, - IDC_CachePointerMaskShift = 0x2, - }; - - static TADDR InitialDispatchCacheCellValue() - { - return IDC_CachePointerPointerUninitialized; - } - DispatchCellInfo GetDispatchCellInfo() { // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be // modified on another thread while this function is executing.) TADDR cachePointerValue = m_pCache; - DispatchCellInfo cellInfo; - cellInfo.Token = m_token; if (IsCache(cachePointerValue)) { - cellInfo.HasCache = 1; + return ((InterfaceDispatchCacheHeader*)cachePointerValue)->GetDispatchCellInfo(); + } + else if (DispatchToken::IsCachedInterfaceDispatchToken(cachePointerValue)) + { + return DispatchCellInfo(DispatchToken::FromCachedInterfaceDispatchToken(cachePointerValue), false); + } + else + { + _ASSERTE(IsVTableOffset(cachePointerValue)); + unsigned offsetOfIndirection = (unsigned)(((uintptr_t)cachePointerValue) >> (TARGET_POINTER_SIZE / 2)); + unsigned offsetAfterIndirection = (unsigned)(((uintptr_t)cachePointerValue) >> (TARGET_POINTER_SIZE / 4)) & 0xFF; + unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; + unsigned slot = (slotGroupPerChunk * VTABLE_SLOTS_PER_CHUNK) + (offsetAfterIndirection / TARGET_POINTER_SIZE); + return DispatchCellInfo(DispatchToken::CreateDispatchToken(slot), false); } - return cellInfo; } static bool IsCache(TADDR value) @@ -168,6 +177,11 @@ struct InterfaceDispatchCell } } + static bool IsVTableOffset(TADDR value) + { + return (value & IDC_CachePointerPointsIsVTableOffset) == IDC_CachePointerPointsIsVTableOffset; + } + InterfaceDispatchCacheHeader* GetCache() const { // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be diff --git a/src/coreclr/vm/contractimpl.h b/src/coreclr/vm/contractimpl.h index f8d7d81856f467..c1784c4226a871 100644 --- a/src/coreclr/vm/contractimpl.h +++ b/src/coreclr/vm/contractimpl.h @@ -159,31 +159,58 @@ struct DispatchToken // token is really a DispatchTokenFat*, and to recover the pointer // we just shift left by 1; correspondingly, when storing a // DispatchTokenFat* in a DispatchToken, we shift right by 1. +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR MASK_TYPE_ID = 0x00003FFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR MASK_TYPE_ID = 0x00007FFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR MASK_SLOT_NUMBER = 0x0000FFFF; static const UINT_PTR SHIFT_TYPE_ID = 0x10; static const UINT_PTR SHIFT_SLOT_NUMBER = 0x0; #ifdef FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR FAT_TOKEN_FLAG = 0x40000000; +#else static const UINT_PTR FAT_TOKEN_FLAG = 0x80000000; +#endif //FEATURE_CACHED_INTERFACE_DISPATCH #endif // FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR INVALID_TOKEN = 0x3FFFFFFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR INVALID_TOKEN = 0x7FFFFFFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH #else //TARGET_64BIT static const UINT_PTR MASK_SLOT_NUMBER = UI64(0x000000000000FFFF); static const UINT_PTR SHIFT_TYPE_ID = 0x20; static const UINT_PTR SHIFT_SLOT_NUMBER = 0x0; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FAT_DISPATCH_TOKENS + static const UINT_PTR MASK_TYPE_ID = UI64(0x000000003FFFFFFF); + static const UINT_PTR FAT_TOKEN_FLAG = UI64(0x4000000000000000); + static const UINT_PTR DISPATCH_TOKEN_FLAG= UI64(0x8000000000000000); +#else + static const UINT_PTR MASK_TYPE_ID = UI64(0x000000007FFFFFFF); + static const UINT_PTR DISPATCH_TOKEN_FLAG= UI64(0x8000000000000000); +#endif // FAT_DISPATCH_TOKENS +#else // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef FAT_DISPATCH_TOKENS static const UINT_PTR MASK_TYPE_ID = UI64(0x000000007FFFFFFF); static const UINT_PTR FAT_TOKEN_FLAG = UI64(0x8000000000000000); #else static const UINT_PTR MASK_TYPE_ID = UI64(0x00000000FFFFFFFF); #endif // FAT_DISPATCH_TOKENS +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR INVALID_TOKEN = 0x3FFFFFFFFFFFFFFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR INVALID_TOKEN = 0x7FFFFFFFFFFFFFFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH #endif //TARGET_64BIT #ifdef FAT_DISPATCH_TOKENS @@ -242,13 +269,27 @@ struct DispatchToken public: #ifdef FAT_DISPATCH_TOKENS -#if !defined(TARGET_64BIT) - static const UINT32 MAX_TYPE_ID_SMALL = 0x00007FFF; -#else - static const UINT32 MAX_TYPE_ID_SMALL = 0x7FFFFFFF; -#endif + static const UINT32 MAX_TYPE_ID_SMALL = MASK_TYPE_ID; #endif // FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + //------------------------------------------------------------------------ + // A Cached Interface DispatchToken uses the low bit to indicate that it is a dispatch token, and not a cache entry + static inline BOOL IsCachedInterfaceDispatchToken(UINT_PTR maybeToken) + { + return maybeToken & 0x1; + } + static inline DispatchToken FromCachedInterfaceDispatchToken(UINT_PTR token) + { + return DispatchToken(token >> 1); + } + static inline UINT_PTR ToCachedInterfaceDispatchToken(DispatchToken token) + { + return (token.m_token << 1) | 0x1; + } +#endif + + //------------------------------------------------------------------------ DispatchToken() { diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h index ee2d60f6f947b5..fc785c5f3b337c 100644 --- a/src/coreclr/vm/methodtable.h +++ b/src/coreclr/vm/methodtable.h @@ -1622,9 +1622,11 @@ class MethodTable typedef DPTR(VTableIndir2_t) VTableIndir_t; static DWORD GetIndexOfVtableIndirection(DWORD slotNum); + static DWORD GetStartSlotForVtableIndirection(UINT32 indirectionIndex, DWORD wNumVirtuals); static DWORD GetEndSlotForVtableIndirection(UINT32 indirectionIndex, DWORD wNumVirtuals); static UINT32 GetIndexAfterVtableIndirection(UINT32 slotNum); + static UINT32 IndexAfterVtableIndirectionToSlot(UINT32 slotNum); static DWORD GetNumVtableIndirections(DWORD wNumVirtuals); DPTR(VTableIndir_t) GetVtableIndirections(); DWORD GetNumVtableIndirections(); diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 5514828fb2aa02..30661c4b7f565c 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1267,8 +1267,7 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke INTERFACE_DISPATCH_CACHED_OR_VSD( InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)ret; pCell->m_pStub = target; - pCell->m_pCache = InterfaceDispatchCell::InitialDispatchCacheCellValue(); - pCell->m_token = token; + pCell->m_pCache = DispatchToken::ToCachedInterfaceDispatchToken(token); ret = (BYTE *)pCell; , *((PCODE *)ret) = target; @@ -1584,7 +1583,7 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); - DispatchToken representativeToken(indirectionCell->m_token); + DispatchToken representativeToken(indirectionCell->GetDispatchCellInfo().Token); MethodTable * pRepresentativeMT = pObj->GetMethodTable(); if (representativeToken.IsTypedToken()) { @@ -1600,7 +1599,7 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, GCStress::MaybeTriggerAndProtect(pObj); - target = CachedInterfaceDispatchResolveWorker(&callSite, protectedObj, indirectionCell->m_token); + target = CachedInterfaceDispatchResolveWorker(&callSite, protectedObj, representativeToken); #if _DEBUG if (pSDFrame->GetGCRefMap() != NULL) From f1c2c65865327e15ce3870989136557ede2077bf Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 27 Jan 2025 16:28:08 -0800 Subject: [PATCH 21/41] Use the isCachedInterfaceDispatchStubAVLocation helper where appropriate --- src/coreclr/vm/amd64/excepamd64.cpp | 2 +- src/coreclr/vm/arm64/stubs.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/amd64/excepamd64.cpp b/src/coreclr/vm/amd64/excepamd64.cpp index 8cdbd3ab881e03..b25dd5c7f09fc8 100644 --- a/src/coreclr/vm/amd64/excepamd64.cpp +++ b/src/coreclr/vm/amd64/excepamd64.cpp @@ -601,7 +601,7 @@ AdjustContextForVirtualStub( bool isVirtualStubNullCheck = false; #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - if (VirtualCallStubManager::isCachedInterfaceDispatchStub(f_IP)) + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) { isVirtualStubNullCheck = true; } diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 9c8d3b1c7acc03..9c6cece878551f 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -962,7 +962,7 @@ AdjustContextForVirtualStub( bool isVirtualStubNullCheck = false; #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - if (VirtualCallStubManager::isCachedInterfaceDispatchStub(f_IP)) + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) { isVirtualStubNullCheck = true; } From 36c9cc0496b3729a7dd505d4ee7ae3a0e90b3561 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 28 Jan 2025 13:06:01 -0800 Subject: [PATCH 22/41] Enable using cached interface dispatch in R2R - Enable generating double pointer indirection cells in R2R files using command line switch. - Fix VTableOffset calculation - Add logic in ExternalMethodFixupWorker to handle the double pointer indirection cells. --- src/coreclr/crossgen-corelib.proj | 3 ++ .../ArrayOfEmbeddedDataNode.cs | 4 +- .../ReadyToRun/DelayLoadHelperImport.cs | 10 +++++ .../ReadyToRun/DelayLoadHelperMethodImport.cs | 2 +- .../ReadyToRun/ImportSectionNode.cs | 16 +++++++- .../ReadyToRunCodegenNodeFactory.cs | 3 +- .../aot/crossgen2/Crossgen2RootCommand.cs | 3 ++ src/coreclr/tools/aot/crossgen2/Program.cs | 1 + .../aot/crossgen2/Properties/Resources.resx | 5 ++- src/coreclr/vm/CachedInterfaceDispatchPal.h | 24 ++++++++--- src/coreclr/vm/prestub.cpp | 40 +++++++++++++++++-- src/coreclr/vm/readytoruninfo.cpp | 2 +- src/coreclr/vm/virtualcallstub.cpp | 3 -- 13 files changed, 98 insertions(+), 18 deletions(-) diff --git a/src/coreclr/crossgen-corelib.proj b/src/coreclr/crossgen-corelib.proj index 7e93e2fcf9b24b..b95536f174a5b6 100644 --- a/src/coreclr/crossgen-corelib.proj +++ b/src/coreclr/crossgen-corelib.proj @@ -118,7 +118,10 @@ $(CrossGenDllCmd) --targetos:linux $(CrossGenDllCmd) -m:$(MergedMibcPath) --embed-pgo-data $(CrossGenDllCmd) -O + $(CrossGenDllCmd) --verify-type-and-field-layout + + $(CrossGenDllCmd) --enable-cached-interface-dispatch-support $(CrossGenDllCmd) @(CoreLib) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs index 801bd7dbe2ccbd..8fadcc5f3beb0f 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs @@ -75,10 +75,12 @@ protected virtual void GetElementDataForNodes(ref ObjectDataBuilder builder, Nod } } + protected virtual int GetAlignmentRequirement(NodeFactory factory) { return factory.Target.PointerSize; } + public override ObjectData GetData(NodeFactory factory, bool relocsOnly) { ObjectDataBuilder builder = new ObjectDataBuilder(factory, relocsOnly); - builder.RequireInitialPointerAlignment(); + builder.RequireInitialAlignment(GetAlignmentRequirement(factory)); if (_sorter != null) _nestedNodesList.MergeSort(_sorter); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs index 54a85ff42e2cca..4e4e00e9fad763 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Diagnostics; using Internal.Text; using Internal.TypeSystem; @@ -66,6 +67,15 @@ public override void EncodeData(ref ObjectDataBuilder dataBuilder, NodeFactory f // when loaded by CoreCLR dataBuilder.EmitReloc(_delayLoadHelper, factory.Target.PointerSize == 4 ? RelocType.IMAGE_REL_BASED_HIGHLOW : RelocType.IMAGE_REL_BASED_DIR64, factory.Target.CodeDelta); + + if (Table.EntrySize == (factory.Target.PointerSize * 2)) + { + dataBuilder.EmitNaturalInt(0); + } + else + { + Debug.Assert(Table.EntrySize == factory.Target.PointerSize); + } } public override IEnumerable GetStaticDependencies(NodeFactory factory) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs index eb996de2ac2b0a..4c73a0ab08bcab 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs @@ -28,7 +28,7 @@ public DelayLoadHelperMethodImport( MethodWithToken method, bool useVirtualCall, bool useInstantiatingStub, - Signature instanceSignature, + Signature instanceSignature, MethodDesc callingMethod = null) : base(factory, importSectionNode, helper, instanceSignature, useVirtualCall, useJumpableStub: false, callingMethod) { diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs index 45dc3fb4e240ab..e2356ea2816f84 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs @@ -12,11 +12,21 @@ public class ImportSectionNode : EmbeddedObjectNode { private class ImportTable : ArrayOfEmbeddedDataNode { - public ImportTable(string symbol) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance)) {} + private byte _alignment; + + public ImportTable(string symbol, byte alignment) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance)) + { + _alignment = alignment; + } public override bool ShouldSkipEmittingObjectNode(NodeFactory factory) => false; public override int ClassCode => (int)ObjectNodeOrder.ImportSectionNode; + + protected override int GetAlignmentRequirement(NodeFactory factory) + { + return _alignment; + } } private readonly ImportTable _imports; @@ -44,7 +54,7 @@ public ImportSectionNode(string name, ReadyToRunImportSectionType importType, Re _emitPrecode = emitPrecode; _emitGCRefMap = emitGCRefMap; - _imports = new ImportTable(_name + "_ImportBegin"); + _imports = new ImportTable(_name + "_ImportBegin", entrySize); _signatures = new ArrayOfEmbeddedPointersNode(_name + "_SigBegin", new EmbeddedObjectNodeComparer(CompilerComparer.Instance)); _signatureList = new List(); _gcRefMap = _emitGCRefMap ? new GCRefMapNode(this) : null; @@ -154,5 +164,7 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer { return _name.CompareTo(((ImportSectionNode)other)._name); } + + public int EntrySize => _entrySize; } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs index 507c34b6f899c0..d3dee3d98bd9a0 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs @@ -58,6 +58,7 @@ public sealed class NodeFactoryOptimizationFlags public TypeValidationRule TypeValidation; public int DeterminismStress; public bool PrintReproArgs; + public bool EnableCachedInterfaceDispatchSupport; } // To make the code future compatible to the composite R2R story @@ -867,7 +868,7 @@ bool HasAnyProfileDataForInput() "DispatchImports", ReadyToRunImportSectionType.StubDispatch, ReadyToRunImportSectionFlags.PCode, - (byte)Target.PointerSize, + this.OptimizationFlags.EnableCachedInterfaceDispatchSupport ? (byte)(2 * Target.PointerSize) : (byte)Target.PointerSize, emitPrecode: false, emitGCRefMap: true); ImportSectionsTable.AddEmbeddedObject(DispatchImports); diff --git a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs index 6265d0df45b65b..64b78d35500ac5 100644 --- a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs +++ b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs @@ -39,6 +39,8 @@ internal class Crossgen2RootCommand : CliRootCommand new("--optimize-space", "--Os") { Description = SR.OptimizeSpaceOption }; public CliOption OptimizeTime { get; } = new("--optimize-time", "--Ot") { Description = SR.OptimizeSpeedOption }; + public CliOption EnableCachedInterfaceDispatchSupport { get; } = + new("--enable-cached-interface-dispatch-support", "--CID") { Description = SR.EnableCachedInterfaceDispatchSupport }; public CliOption TypeValidation { get; } = new("--type-validation") { DefaultValueFactory = _ => TypeValidationRule.Automatic, Description = SR.TypeValidation, HelpName = "arg" }; public CliOption InputBubble { get; } = @@ -163,6 +165,7 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) Options.Add(OptimizeDisabled); Options.Add(OptimizeSpace); Options.Add(OptimizeTime); + Options.Add(EnableCachedInterfaceDispatchSupport); Options.Add(TypeValidation); Options.Add(InputBubble); Options.Add(InputBubbleReferenceFilePaths); diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index 1453a6cf177521..0b5c00c95c6717 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -595,6 +595,7 @@ private void RunSingleCompilation(Dictionary inFilePaths, Instru nodeFactoryFlags.TypeValidation = Get(_command.TypeValidation); nodeFactoryFlags.DeterminismStress = Get(_command.DeterminismStress); nodeFactoryFlags.PrintReproArgs = Get(_command.PrintReproInstructions); + nodeFactoryFlags.EnableCachedInterfaceDispatchSupport = Get(_command.EnableCachedInterfaceDispatchSupport); builder .UseMapFile(Get(_command.Map)) diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index 59bfc796f397cb..04334beb795018 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -423,4 +423,7 @@ Number of nested occurrences of a potentially cyclic generic type to cut off - + + Enable support for cached interface dispatch + + \ No newline at end of file diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index 2558f3ea265e02..568518fc6ca387 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -6,6 +6,8 @@ #ifdef FEATURE_CACHED_INTERFACE_DISPATCH +extern "C" void RhpInitialInterfaceDispatch(); + #ifndef HOST_WINDOWS #if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) @@ -88,11 +90,26 @@ struct DispatchCellInfo unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; - return (((uintptr_t)offsetOfIndirection) << (TARGET_POINTER_SIZE / 2)) + (((uintptr_t)offsetAfterIndirection) << TARGET_POINTER_SIZE / 4) | (uintptr_t)IDC_CachePointerPointsIsVTableOffset; + uintptr_t offsetOfIndirectionPortion = (((uintptr_t)offsetOfIndirection) << ((TARGET_POINTER_SIZE * 8) / 2)); + uintptr_t offsetAfterIndirectionPortion = (((uintptr_t)offsetAfterIndirection) << ((TARGET_POINTER_SIZE * 8) / 4)); + uintptr_t flagPortion = (uintptr_t)IDC_CachePointerPointsIsVTableOffset; + + uintptr_t result = offsetOfIndirectionPortion | offsetAfterIndirectionPortion | flagPortion; + _ASSERTE(slot == VTableOffsetToSlot(result)); + return result; } return 0; } + static unsigned VTableOffsetToSlot(uintptr_t vtableOffset) + { + unsigned offsetOfIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 2)); + unsigned offsetAfterIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 4)) & 0xFF; + unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; + unsigned slot = (slotGroupPerChunk * VTABLE_SLOTS_PER_CHUNK) + (offsetAfterIndirection / TARGET_POINTER_SIZE); + return slot; + } + const uint8_t HasCache = 0; }; @@ -157,10 +174,7 @@ struct InterfaceDispatchCell else { _ASSERTE(IsVTableOffset(cachePointerValue)); - unsigned offsetOfIndirection = (unsigned)(((uintptr_t)cachePointerValue) >> (TARGET_POINTER_SIZE / 2)); - unsigned offsetAfterIndirection = (unsigned)(((uintptr_t)cachePointerValue) >> (TARGET_POINTER_SIZE / 4)) & 0xFF; - unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; - unsigned slot = (slotGroupPerChunk * VTABLE_SLOTS_PER_CHUNK) + (offsetAfterIndirection / TARGET_POINTER_SIZE); + unsigned slot = DispatchCellInfo::VTableOffsetToSlot(cachePointerValue); return DispatchCellInfo(DispatchToken::CreateDispatchToken(slot), false); } } diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index f85f8b3fe9aa7e..f9501736e5c657 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -23,6 +23,8 @@ #include "virtualcallstub.h" #include "../debug/ee/debugger.h" +#include "CachedInterfaceDispatchPal.h" + #ifdef FEATURE_COMINTEROP #include "clrtocomcall.h" #endif @@ -3154,8 +3156,10 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl } _ASSERTE(pImportSection != NULL); - _ASSERTE(pImportSection->EntrySize == sizeof(TADDR)); - COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / sizeof(TADDR); + COUNT_T index; + + index = (rva - pImportSection->Section.VirtualAddress) / pImportSection->EntrySize; + _ASSERTE((pImportSection->EntrySize == sizeof(TADDR)) || (pImportSection->EntrySize == 2*sizeof(TADDR))); PTR_DWORD pSignatures = dac_cast(pNativeImage->GetRvaData(pImportSection->Signatures)); @@ -3306,6 +3310,36 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl #endif #if defined(FEATURE_CACHED_INTERFACE_DISPATCH) { + if (ALIGN_UP(rva, sizeof(TADDR) * 2) == rva && pImportSection->EntrySize == sizeof(TADDR) * 2) + { + // The entry is aligned and the size is correct, so we can use the cached interface dispatch mechanism + // to speed up further uses of this interface dispatch slot + DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(pMT, slot); + + uintptr_t addr = (uintptr_t)RhpInitialInterfaceDispatch; + uintptr_t pCache = (uintptr_t)DispatchToken::ToCachedInterfaceDispatchToken(token); +#ifdef TARGET_64BIT + int64_t rgComparand[2] = { *(volatile int64_t*)pIndirection , *(((volatile int64_t*)pIndirection) + 1) }; + // We need to only update if the indirection cell is still pointing to the initial R2R stub + // But we don't have the address of the initial R2R stub, as that is part of the R2R image + // However, we can rely on the detail that the cache value will never be 0 once it is updated + // So we read the indirection cell data, and if the cache portion is 0, we attempt to update the complete cell + if (rgComparand[1] == 0 && PalInterlockedCompareExchange128((int64_t*)pIndirection, rgComparand[1], rgComparand[0], rgComparand)) + { + PalInterlockedCompareExchange128((int64_t*)pIndirection, pCache, addr, rgComparand); + } +#else + // Stuff the two pointers into a 64-bit value as the proposed new value for the CompareExchange64 below. + uint64_t oldValue = *(volatile uint64_t*)pIndirection; + if ((oldValue >> 32) == 0) + { + // The cache portion is 0, so we attempt to update the complete cell + int64_t iNewValue = (int64_t)((uint64_t)(uintptr_t)addr | ((uint64_t)(uintptr_t)pCache << 32)); + PalInterlockedCompareExchange64((int64_t*)pIndirection, iNewValue, oldValue); + } +#endif + } + // We don't yet have a proper implementation for cached interface stubs in R2R code, so instead of finding stubs, simply do the resolution in pure C++ // and skip updating the indirection cell DispatchToken token; @@ -3751,7 +3785,7 @@ PCODE DynamicHelperFixup(TransitionBlock * pTransitionBlock, TADDR * pCell, DWOR _ASSERTE(pImportSection->EntrySize == sizeof(TADDR)); - COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / sizeof(TADDR); + COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / pImportSection->EntrySize; PTR_DWORD pSignatures = dac_cast(pNativeImage->GetRvaData(pImportSection->Signatures)); diff --git a/src/coreclr/vm/readytoruninfo.cpp b/src/coreclr/vm/readytoruninfo.cpp index 34cd10559aa1ba..bbc5948412df14 100644 --- a/src/coreclr/vm/readytoruninfo.cpp +++ b/src/coreclr/vm/readytoruninfo.cpp @@ -485,7 +485,7 @@ static bool AcquireImage(Module * pModule, PEImageLayout * pLayout, READYTORUN_H // Found an eager fixup section. Check the signature of each fixup in this section. PVOID *pFixups = (PVOID *)((PBYTE)pLayout->GetBase() + pCurSection->Section.VirtualAddress); - DWORD nFixups = pCurSection->Section.Size / TARGET_POINTER_SIZE; + DWORD nFixups = pCurSection->Section.Size / pCurSection->EntrySize; DWORD *pSignatures = (DWORD *)((PBYTE)pLayout->GetBase() + pCurSection->Signatures); for (DWORD i = 0; i < nFixups; i++) { diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 30661c4b7f565c..c3ba7aeca0a97b 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -105,8 +105,6 @@ BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, return GenerateDispatchStubCellEntrySlot(pLoaderAllocator, ownerType, pMD->GetSlot(), pResolver); } -extern "C" void RhpInitialInterfaceDispatch(); - BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver) { // Generate a dispatch stub and gather a slot. @@ -4072,7 +4070,6 @@ extern "C" void RhpInterfaceDispatch32(); extern "C" void RhpInterfaceDispatch64(); extern "C" void RhpVTableOffsetDispatch(); -extern "C" void RhpInitialInterfaceDispatch(); extern "C" void RhpInterfaceDispatchAVLocation1(); extern "C" void RhpInterfaceDispatchAVLocation2(); From a3a4ff1d99e0814d21f370e5c3d73c10251afcad Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 28 Jan 2025 14:08:59 -0800 Subject: [PATCH 23/41] Move PalInterlockedCompareExchange128 to the PAL/minipal --- src/coreclr/minipal/minipal.h | 13 +++++++++++ src/coreclr/pal/inc/pal.h | 16 +++++++++++++ src/coreclr/vm/CachedInterfaceDispatchPal.h | 26 --------------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h index 38ab07ec63c54d..afecd9ce74dc72 100644 --- a/src/coreclr/minipal/minipal.h +++ b/src/coreclr/minipal/minipal.h @@ -76,3 +76,16 @@ class VMToOSInterface // true if it succeeded, false if it failed static bool ReleaseRWMapping(void* pStart, size_t size); }; + +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); + +#if defined(HOST_WINDOWS) +#pragma intrinsic(_InterlockedCompareExchange128) +#endif + +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) \ No newline at end of file diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index fdeba4572e469a..f1a2d97b299fb7 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -3618,6 +3618,22 @@ Define_InterlockMethod( ((PVOID)(UINT_PTR)InterlockedCompareExchange((PLONG)(UINT_PTR)(Destination), (LONG)(UINT_PTR)(ExChange), (LONG)(UINT_PTR)(Comperand))) #endif +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +FORCEINLINE uint8_t _InterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; + // TODO-LOONGARCH64: for LoongArch64, it supports 128bits atomic from 3A6000-CPU which is ISA1.1's version. + // The LA64's compiler will translate the `__sync_val_compare_and_swap` into calling the libatomic's library interface to emulate + // the 128-bit CAS by mutex_lock if the target processor doesn't support the ISA1.1. + // But this emulation by libatomic doesn't satisfy requirements here which it must update two adjacent pointers atomically. + // this is being discussed in https://github.com/dotnet/runtime/issues/109276. + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); + PAL_InterlockedOperationBarrier(); + pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); + return iComparand == iResult; +} +#endif + /*++ Function: MemoryBarrier diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index 568518fc6ca387..d62a9a55847843 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -8,32 +8,6 @@ extern "C" void RhpInitialInterfaceDispatch(); -#ifndef HOST_WINDOWS -#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) -FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) -{ - __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; - // TODO-LOONGARCH64: for LoongArch64, it supports 128bits atomic from 3A6000-CPU which is ISA1.1's version. - // The LA64's compiler will translate the `__sync_val_compare_and_swap` into calling the libatomic's library interface to emulate - // the 128-bit CAS by mutex_lock if the target processor doesn't support the ISA1.1. - // But this emulation by libatomic doesn't satisfy requirements here which it must update two adjacent pointers atomically. - // this is being discussed in https://github.com/dotnet/runtime/issues/109276. - __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); - PAL_InterlockedOperationBarrier(); - pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); - return iComparand == iResult; -} -#endif // HOST_AMD64 || HOST_ARM64 || HOST_LOONGARCH64 -#else // HOST_WINDOWS -#if defined(HOST_AMD64) || defined(HOST_ARM64) -EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); -#pragma intrinsic(_InterlockedCompareExchange128) -FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) -{ - return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); -} -#endif // HOST_AMD64 || HOST_ARM64 -#endif // HOST_WINDOWS bool InterfaceDispatch_InitializePal(); From 18b3f132e5d1a14da0420b8665dcb9e3a3e656d4 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 28 Jan 2025 17:39:24 -0800 Subject: [PATCH 24/41] Add support for cleaning up memory for the cache blocks --- .../CachedInterfaceDispatch.cpp | 4 +- .../shared_runtime/CachedInterfaceDispatch.h | 5 ++ src/coreclr/vm/dynamicmethod.cpp | 15 ++++++ src/coreclr/vm/virtualcallstub.cpp | 48 +++++++++++++++++++ src/coreclr/vm/virtualcallstub.h | 11 +++++ 5 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp index 374af5dc33bc22..891c2b94c0baa2 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp @@ -314,7 +314,7 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * // Discards a cache by adding it to a list of caches that may still be in use but will be made available for // re-allocation at the next GC. -static void DiscardCache(InterfaceDispatchCache * pCache) +void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache) { CID_COUNTER_INC(CacheDiscards); @@ -479,7 +479,7 @@ PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, P // value or the cache we just allocated (another thread performed an update first). InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue); if (pDiscardedCache) - DiscardCache(pDiscardedCache); + InterfaceDispatch_DiscardCache(pDiscardedCache); return (PCODE)pTargetCode; } diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h index 9e58d3e53e72b6..690b1ebaf86be2 100644 --- a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h +++ b/src/coreclr/shared_runtime/CachedInterfaceDispatch.h @@ -49,6 +49,11 @@ struct InterfaceDispatchCache bool InterfaceDispatch_Initialize(); PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo); void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); +void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache); +inline void InterfaceDispatch_DiscardCacheHeader(InterfaceDispatchCacheHeader * pCache) +{ + return InterfaceDispatch_DiscardCache((InterfaceDispatchCache*)pCache); +} inline PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType) { diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 7f51aa3690ef24..5285bc76e7af1c 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -15,6 +15,8 @@ #include "nibblemapmacros.h" #include "stringliteralmap.h" #include "virtualcallstub.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" #ifndef DACCESS_COMPILE @@ -954,6 +956,19 @@ void LCGMethodResolver::RecycleIndCells() cellcurr = list->indcell; _ASSERTE(cellcurr != NULL); +#if defined (FEATURE_CACHED_INTERFACE_DISPATCH) + // Cached dispatch dispatch uses dynamically allocated caches that need to be freed individually + if (UseCachedInterfaceDispatch()) + { + InterfaceDispatchCell *pDispatchCell = (InterfaceDispatchCell*)cellcurr; + InterfaceDispatchCacheHeader* cellCacheHeader = pDispatchCell->GetCache(); + if (cellCacheHeader != NULL) + { + InterfaceDispatch_DiscardCacheHeader(cellCacheHeader); + pDispatchCell->m_pCache = 0; + } + } +#endif if (cellprev) *((BYTE**)cellprev) = cellcurr; diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index c3ba7aeca0a97b..1f90e0495389df 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -98,6 +98,15 @@ extern size_t g_dispatch_cache_chain_success_counter; SPTR_IMPL_INIT(VirtualCallStubManagerManager, VirtualCallStubManagerManager, g_pManager, NULL); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +struct CachedIndirectionCellBlockListNode +{ + CachedIndirectionCellBlockListNode *m_pNext; + TADDR m_pFiller; // Used to ensure that the Indirection Cells are double pointer aligned + InterfaceDispatchCell m_rgIndCells[0]; +}; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifndef DACCESS_COMPILE BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver) @@ -793,6 +802,26 @@ VirtualCallStubManager::~VirtualCallStubManager() } #endif // FEATURE_VIRTUAL_STUB_DISPATCH +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + CachedIndirectionCellBlockListNode * pBlockNode = m_indirectionBlocks; + while (pBlockNode != NULL) + { + for (int i = 0; i < INDCELLS_PER_BLOCK; i++) + { + InterfaceDispatchCacheHeader* cache = pBlockNode->m_rgIndCells[i].GetCache(); + if (cache != NULL) + { + InterfaceDispatch_DiscardCacheHeader(cache); + } + } + + pBlockNode = pBlockNode->m_pNext; + } + } +#endif + if (indcell_heap) { delete indcell_heap; indcell_heap = NULL;} #ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (lookup_heap) { delete lookup_heap; lookup_heap = NULL;} @@ -1243,8 +1272,27 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke INTERFACE_DISPATCH_CACHED_OR_VSD(alignment = sizeof(TADDR) * 2, alignment = sizeof(TADDR)); // Free list is empty, allocate a block of indcells from indcell_heap and insert it into the free list. + size_t cellsAllocationSize = cellsPerBlock * sizeOfIndCell; + size_t allocationSize = cellsAllocationSize; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + allocationSize += sizeof(CachedIndirectionCellBlockListNode); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(cellsPerBlock * sizeOfIndCell, alignment); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + CachedIndirectionCellBlockListNode * pBlockNode = (CachedIndirectionCellBlockListNode *)pBlock; + pBlockNode->m_pNext = m_indirectionBlocks; + m_indirectionBlocks = pBlockNode; + pBlock = (BYTE **)(&pBlockNode->m_rgIndCells[0]); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + // return the first cell in the block and add the rest to the free list ret = (BYTE *)pBlock; diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 3f457ddde83ee8..88cd82e9a15671 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -167,6 +167,10 @@ extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForReg typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +struct CachedIndirectionCellBlockListNode; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + // VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry // // file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc @@ -305,6 +309,9 @@ class VirtualCallStubManager : public StubManager m_cur_counter_block_for_reclaim(NULL), m_cur_counter_block_for_reclaim_index(0), m_pNext(NULL) +#if defined FEATURE_CACHED_INTERFACE_DISPATCH + , m_indirectionBlocks (0) +#endif { LIMITED_METHOD_CONTRACT; ZeroMemory(&stats, sizeof(stats)); @@ -669,6 +676,10 @@ class VirtualCallStubManager : public StubManager // Used to keep track of all the VCSManager objects in the system. PTR_VirtualCallStubManager m_pNext; // Linked list pointer +#if defined FEATURE_CACHED_INTERFACE_DISPATCH + CachedIndirectionCellBlockListNode *m_indirectionBlocks; +#endif + public: #ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Given a stub address, find the VCSManager that owns it. From 80ae02b52a7cc24a955d6ac85ab3072fa6ffc827 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 29 Jan 2025 11:16:58 -0800 Subject: [PATCH 25/41] Fix Open Virtual Dispatch on Delegates --- .../vm/amd64/CachedInterfaceDispatchCoreCLR.S | 16 ++++++++++++++++ .../amd64/CachedInterfaceDispatchCoreCLR.asm | 18 ++++++++++++++++++ .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 17 ++++++++++++++++- .../arm64/CachedInterfaceDispatchCoreCLR.asm | 17 +++++++++++++++++ src/coreclr/vm/comdelegate.cpp | 13 +++++++++++++ src/coreclr/vm/comdelegate.h | 1 + src/coreclr/vm/virtualcallstub.cpp | 5 +++-- 7 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S index a127d134142f2d..23f55849b18e01 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -49,3 +49,19 @@ NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler TAILJMP_RAX NESTED_END RhpInterfaceDispatchSlow, _TEXT + +// On Input: +// r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell + + call C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm index 804ed7bb77b065..dcd665f19b4ae0 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -5,6 +5,7 @@ include include AsmConstants.inc extern CID_ResolveWorker:proc + extern CID_VirtualOpenDelegateDispatchWorker:proc ;; Stub dispatch routine for dispatch to a vtable slot LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT @@ -51,4 +52,21 @@ NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT NESTED_END RhpInterfaceDispatchSlow, _TEXT +;; On Input: +;; r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_VirtualOpenDelegateDispatchWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + end \ No newline at end of file diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index 70646e98ed6d06..a3b544474b0ff9 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -46,7 +46,7 @@ add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock mov x1, x11 // indirection cell - bl C_FUNC(CID_ResolveWorker) + bl C_FUNC(CID_ResolveWorker) mov x9, x0 @@ -54,3 +54,18 @@ EPILOG_BRANCH_REG x9 NESTED_END RhpInterfaceDispatchSlow, _TEXT +// x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 // indirection cell + + bl C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm index aaee6655e4814b..078617e669605d 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -8,6 +8,7 @@ TEXTAREA EXTERN CID_ResolveWorker + EXTERN CID_VirtualOpenDelegateDispatchWorker ;; ;; Stub dispatch routine for dispatch to a vtable slot @@ -59,4 +60,20 @@ EPILOG_BRANCH_REG x9 NESTED_END RhpInterfaceDispatchSlow +;; x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_VirtualOpenDelegateDispatchWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch + END diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 1994d5588d7258..80de23ea56adaa 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -1811,6 +1811,19 @@ extern "C" void QCALLTYPE Delegate_Construct(QCall::ObjectHandleOnStack _this, Q END_QCALL; } +MethodDesc *COMDelegate::GetMethodDescForOpenVirtualDelegate(OBJECTREF orDelegate) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } + CONTRACTL_END; + + return (MethodDesc*)((DELEGATEREF)orDelegate)->GetInvocationCount(); +} + MethodDesc *COMDelegate::GetMethodDesc(OBJECTREF orDelegate) { CONTRACTL diff --git a/src/coreclr/vm/comdelegate.h b/src/coreclr/vm/comdelegate.h index 064d6d0177b843..0d241673059ea1 100644 --- a/src/coreclr/vm/comdelegate.h +++ b/src/coreclr/vm/comdelegate.h @@ -69,6 +69,7 @@ class COMDelegate static Stub* GetInvokeMethodStub(EEImplMethodDesc* pMD); static MethodDesc * __fastcall GetMethodDesc(OBJECTREF obj); + static MethodDesc* GetMethodDescForOpenVirtualDelegate(OBJECTREF orDelegate); static OBJECTREF GetTargetObject(OBJECTREF obj); static BOOL IsTrueMulticastDelegate(OBJECTREF delegate); diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 1f90e0495389df..203541297208bd 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1511,7 +1511,7 @@ PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *p } // Resolve a dispatch on a virtual open delegate without updating any pointers -extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBlock) +extern "C" PCODE CID_VirtualOpenDelegateDispatchWorker(TransitionBlock * pTransitionBlock, PCODE* ppMethodPtrAuxField) { CONTRACTL { THROWS; @@ -1521,6 +1521,7 @@ extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBl MODE_COOPERATIVE; } CONTRACTL_END; + OBJECTREF delegateObj = ObjectToOBJECTREF((Object*)(((BYTE*)ppMethodPtrAuxField) - DelegateObject::GetOffsetOfMethodPtrAux())); MAKE_CURRENT_THREAD_AVAILABLE(); #ifdef _DEBUG @@ -1547,7 +1548,7 @@ extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBl _ASSERTE(!"Throw returned"); } - MethodDesc *pTargetMD = COMDelegate::GetMethodDesc(pObj); + MethodDesc *pTargetMD = COMDelegate::GetMethodDescForOpenVirtualDelegate(delegateObj); pSDFrame->SetFunction(pTargetMD); pSDFrame->Push(CURRENT_THREAD); From 081fac55010528f9a5ffd83ea2cdcd9d89fd2a6c Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 29 Jan 2025 11:21:55 -0800 Subject: [PATCH 26/41] Try to fix unix build --- src/coreclr/pal/inc/pal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index f1a2d97b299fb7..900d2ad5843083 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -3619,7 +3619,7 @@ Define_InterlockMethod( #endif #if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) -FORCEINLINE uint8_t _InterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +FORCEINLINE uint8_t _InterlockedCompareExchange128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; // TODO-LOONGARCH64: for LoongArch64, it supports 128bits atomic from 3A6000-CPU which is ISA1.1's version. From a0b9d2acddc8dc61abba5621bc1793db0b923230 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Wed, 29 Jan 2025 17:30:05 -0800 Subject: [PATCH 27/41] Fixes for issues found in CI --- .../DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs | 2 +- src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S | 2 +- src/coreclr/vm/virtualcallstub.h | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs index d3dee3d98bd9a0..e644439b2bec67 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs @@ -308,7 +308,7 @@ private void CreateNodeCaches() { return new DelayLoadHelperMethodImport( this, - DispatchImports, + HelperImports, ReadyToRunHelper.DelayLoad_Helper_Obj, key.Method, useVirtualCall: false, diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index a3b544474b0ff9..59e8730cde73d1 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -59,7 +59,7 @@ PROLOG_WITH_TRANSITION_BLOCK - add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock mov x1, x11 // indirection cell bl C_FUNC(CID_VirtualOpenDelegateDispatchWorker) diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 88cd82e9a15671..0203b95a6e13b7 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -1650,13 +1650,15 @@ class BucketTable BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver); BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver); -inline bool UseCachedInterfaceDispatch() { return true; } #if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return true; } #define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispath; } #elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return true; } #define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { cachedDispatch; } #elif defined(FEATURE_VIRTUAL_STUB_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return false; } #define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { vsdDispath; } #else #error "No dispatch mechanism defined" From dcbe17cc81ec4fe7bc6ce21f7491f8861ed5c138 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Thu, 30 Jan 2025 18:23:39 +0000 Subject: [PATCH 28/41] Fix more issues found on Unix platforms --- .../nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S | 4 ++-- src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S | 4 ++-- src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S | 4 ++-- src/coreclr/vm/virtualcallstub.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S index 92c9aff7a32ecb..2500ea41767266 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S @@ -10,6 +10,7 @@ // Stub dispatch routine for dispatch to a vtable slot LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + int 3 // UNIXTODO: Implement this function ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation int 3 @@ -18,9 +19,8 @@ LEAF_END RhpVTableOffsetDispatch, _TEXT // Cache miss case, call the runtime to resolve the target and update the cache. // Use universal transition helper to allow an exception to flow out of resolution LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // r10 contains indirection cell address, move to r11 where it will be passed by + // r11 contains indirection cell address already, so it will naturally be passed to RhpCidResolve // the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 mov r10, [rip + REL_C_FUNC(RhpCidResolve)] jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S index 23f55849b18e01..b2a08d23d76254 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -16,10 +16,10 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT mov rax, r11 shr rax, 32 - // Load the MethodTable from the object instance in rcx, and add it to the vtable offset + // Load the MethodTable from the object instance in rdi, and add it to the vtable offset // to get the address in the vtable chunk list of what we want to dereference ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation - add rax, [rcx] + add rax, [rdi] // Load the target address of the vtable chunk into rax mov rax, [rax] diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index 59e8730cde73d1..e67d804dd55bd5 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -55,7 +55,7 @@ NESTED_END RhpInterfaceDispatchSlow, _TEXT // x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) - NESTED_ENTRY CID_VirtualOpenDelegateDispatch, NoHandler + NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK @@ -68,4 +68,4 @@ EPILOG_WITH_TRANSITION_BLOCK_TAILCALL EPILOG_BRANCH_REG x9 - NESTED_END CID_VirtualOpenDelegateDispatch + NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 203541297208bd..0beb87dc737bb7 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -808,7 +808,7 @@ VirtualCallStubManager::~VirtualCallStubManager() CachedIndirectionCellBlockListNode * pBlockNode = m_indirectionBlocks; while (pBlockNode != NULL) { - for (int i = 0; i < INDCELLS_PER_BLOCK; i++) + for (UINT32 i = 0; i < INDCELLS_PER_BLOCK; i++) { InterfaceDispatchCacheHeader* cache = pBlockNode->m_rgIndCells[i].GetCache(); if (cache != NULL) From 48b5009ee3c8e75e4c5b758f5f3fec8d3bd9cc14 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 31 Jan 2025 14:53:58 -0800 Subject: [PATCH 29/41] Fix x64 stub dispatch code to use the right register, and switch to allocating the memory for the dispatch using the LoaderHeap Also tweak a collectible assembly test to actually use cached interface dispatch --- .../shared_runtime/amd64/StubDispatch.S | 12 +++++----- .../shared_runtime/amd64/StubDispatch.asm | 2 +- .../vm/CachedInterfaceDispatchCoreclr.cpp | 4 ++-- .../Statics/CollectibleTLSStaticCollection.cs | 23 ++++++++++++++++++- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.S b/src/coreclr/shared_runtime/amd64/StubDispatch.S index b93fa628c10007..2b07c127439909 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.S +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.S @@ -13,9 +13,9 @@ LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT - // r10 currently contains the indirection cell address. - // load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + // r11 currently contains the indirection cell address. + // load r10 to point to the cache block. + mov r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] // Load the MethodTable from the object instance in rdi. ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries @@ -26,14 +26,14 @@ LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT // For each entry in the cache, see if its MethodTable type matches the MethodTable in rax. // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. .rept \entries - cmp rax, [r11 + CurrentOffset] + cmp rax, [r10 + CurrentOffset] jne 0f - jmp [r11 + CurrentOffset + 8] + jmp [r10 + CurrentOffset + 8] 0: CurrentOffset = CurrentOffset + 16 .endr - // r10 still contains the indirection cell address. + // r11 still contains the indirection cell address. jmp C_FUNC(RhpInterfaceDispatchSlow) LEAF_END RhpInterfaceDispatch\entries, _TEXT diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.asm b/src/coreclr/shared_runtime/amd64/StubDispatch.asm index 647044e5c8459f..1863a43c14720d 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.asm +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.asm @@ -29,7 +29,7 @@ LEAF_ENTRY StubName, _TEXT ;EXTERN CID_g_cInterfaceDispatches : DWORD ;inc [CID_g_cInterfaceDispatches] - ;; r10 currently contains the indirection cell address. + ;; r11 currently contains the indirection cell address. ;; load r10 to point to the cache block. mov r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] diff --git a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp index 304e4be5f4abf4..2873081b02df61 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp +++ b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp @@ -11,11 +11,11 @@ bool InterfaceDispatch_InitializePal() // Allocate memory aligned at sizeof(void*)*2 boundaries void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) { - return malloc(size); + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR) * 2); } // Allocate memory aligned at at least sizeof(void*) void *InterfaceDispatch_AllocPointerAligned(size_t size) { - return malloc(size); + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR)); } diff --git a/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs b/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs index d84d3d05769c80..bac360c836e626 100644 --- a/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs +++ b/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs @@ -11,6 +11,19 @@ namespace CollectibleThreadStaticShutdownRace { + public interface IGetAnInt + { + int GetInt(); + } + + public class GetAnInt : IGetAnInt + { + public int GetInt() + { + return 1; + } + } + public class CollectibleThreadStaticShutdownRace { Action? UseTLSStaticFromLoaderAllocator = null; @@ -40,6 +53,10 @@ void ThreadThatWaitsForLoaderAllocatorToDisappear() } } + public static IGetAnInt s_getAnInt = new GetAnInt(); + static FieldInfo s_getAnIntField; + static MethodInfo s_getAnIntMethod; + void CreateLoaderAllocatorWithTLS() { ulong collectibleIndex = s_collectibleIndex++; @@ -66,7 +83,8 @@ void CreateLoaderAllocatorWithTLS() "Method", MethodAttributes.Public | MethodAttributes.Static); var ilg = mb.GetILGenerator(); - ilg.Emit(OpCodes.Ldc_I4_1); + ilg.Emit(OpCodes.Ldsfld, s_getAnIntField); + ilg.Emit(OpCodes.Callvirt, s_getAnIntMethod); ilg.Emit(OpCodes.Stsfld, fb); ilg.Emit(OpCodes.Ret); } @@ -96,6 +114,9 @@ void ForceCollectibleTLSStaticToGoThroughThreadTermination() [Fact] public static void TestEntryPoint() { + s_getAnIntField = typeof(CollectibleThreadStaticShutdownRace).GetField("s_getAnInt"); + s_getAnIntMethod = typeof(IGetAnInt).GetMethod("GetInt"); + new CollectibleThreadStaticShutdownRace().ForceCollectibleTLSStaticToGoThroughThreadTermination(); } } From fa72602ae7c0cf9e8d302a696577cee4c8fa19eb Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 3 Feb 2025 13:00:52 -0800 Subject: [PATCH 30/41] Add environment variable to control use of cached dispatch for testing scenarios --- src/coreclr/clrfeatures.cmake | 10 +++++++--- src/coreclr/inc/clrconfigvalues.h | 1 + src/coreclr/pal/inc/pal.h | 15 ++++++++++----- src/coreclr/vm/eeconfig.cpp | 5 +++++ src/coreclr/vm/eeconfig.h | 9 +++++++++ src/coreclr/vm/virtualcallstub.h | 4 +++- 6 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 30776069c00062..9c5f071ac16737 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -40,14 +40,18 @@ if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) endif(CLR_CMAKE_TARGET_WIN32) -if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) - if (CLR_CMAKE_TARGET_UNIX AND CLR_CMAKE_TARGET_ARCH_AMD64) +# Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) +# Only enable in chk/debug builds as this support isn't intended for retail use elsewhere +if (((CMAKE_BUILD_TYPE STREQUAL "Debug") OR (CMAKE_BUILD_TYPE STREQUAL "Checked")) AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)) + if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64) # Allow 16 byte compare-exchange (cmpxchg16b) add_compile_options(-mcx16) endif() set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) endif() -if (NOT (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)) +if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) +else() set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) endif() diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 7e23d58858dd93..a921cc295f92d6 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -581,6 +581,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_VirtualCallStubLogging, W("VirtualCallStubLogg CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubMissCount, W("VirtualCallStubMissCount"), 100, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheCounter, W("VirtualCallStubResetCacheCounter"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheIncr, W("VirtualCallStubResetCacheIncr"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") +CONFIG_DWORD_INFO(INTERNAL_UseCachedInterfaceDispatch, W("UseCachedInterfaceDispatch"), 0, "If cached interface dispatch is compiled in, use that instead of virtual stub dispatch") /// /// Watson diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 900d2ad5843083..8f0092ded42f2f 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -3622,11 +3622,16 @@ Define_InterlockMethod( FORCEINLINE uint8_t _InterlockedCompareExchange128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; - // TODO-LOONGARCH64: for LoongArch64, it supports 128bits atomic from 3A6000-CPU which is ISA1.1's version. - // The LA64's compiler will translate the `__sync_val_compare_and_swap` into calling the libatomic's library interface to emulate - // the 128-bit CAS by mutex_lock if the target processor doesn't support the ISA1.1. - // But this emulation by libatomic doesn't satisfy requirements here which it must update two adjacent pointers atomically. - // this is being discussed in https://github.com/dotnet/runtime/issues/109276. + // TODO-LOONGARCH64: the 128-bit CAS is supported starting from the 3A6000 CPU (ISA1.1). + // When running on older hardware that doesn't support native CAS-128, the system falls back + // to a mutex-based approach via libatomic, which is not suitable for runtime requirements. + // + // TODO-RISCV64: double-check if libatomic's emulated CAS-128 works as expected once AOT applications are + // functional on linux-riscv64: https://github.com/dotnet/runtime/issues/106223. + // CAS-128 is natively supported starting with the Zacas extension in Linux 6.8; however, hardware support + // for RVA23 profile is not available at the time of writing. + // + // See https://github.com/dotnet/runtime/issues/109276. __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); PAL_InterlockedOperationBarrier(); pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); diff --git a/src/coreclr/vm/eeconfig.cpp b/src/coreclr/vm/eeconfig.cpp index bd07afadb861ff..967d0d4f94b23f 100644 --- a/src/coreclr/vm/eeconfig.cpp +++ b/src/coreclr/vm/eeconfig.cpp @@ -769,6 +769,11 @@ HRESULT EEConfig::sync() #if defined(FEATURE_GDBJIT_FRAME) fGDBJitEmitDebugFrame = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_GDBJitEmitDebugFrame) != 0; #endif + +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + fUseCachedInterfaceDispatch = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseCachedInterfaceDispatch) != 0; +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + return hr; } diff --git a/src/coreclr/vm/eeconfig.h b/src/coreclr/vm/eeconfig.h index 1f66a86fec1269..d46b4b5db4cfa3 100644 --- a/src/coreclr/vm/eeconfig.h +++ b/src/coreclr/vm/eeconfig.h @@ -124,6 +124,10 @@ class EEConfig } #endif // FEATURE_GDBJIT && _DEBUG +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + bool UseCachedInterfaceDispatch() const { LIMITED_METHOD_CONTRACT; return fUseCachedInterfaceDispatch; } +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + #if defined(FEATURE_GDBJIT_FRAME) inline bool ShouldEmitDebugFrame(void) const {LIMITED_METHOD_CONTRACT; return fGDBJitEmitDebugFrame;} #endif // FEATURE_GDBJIT_FRAME @@ -642,6 +646,11 @@ class EEConfig #if defined(FEATURE_GDBJIT_FRAME) bool fGDBJitEmitDebugFrame; #endif + +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + bool fUseCachedInterfaceDispatch; +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + public: enum BitForMask { diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 0203b95a6e13b7..e084115e19f20f 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -21,6 +21,8 @@ //#define STUB_LOGGING #endif +bool UseCachedInterfaceDispatch(); + #include "stubmgr.h" ///////////////////////////////////////////////////////////////////////////////////// @@ -1652,7 +1654,7 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH #if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) -inline bool UseCachedInterfaceDispatch() { return true; } +inline bool UseCachedInterfaceDispatch() { return g_pConfig->UseCachedInterfaceDispatch(); } #define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispath; } #elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) inline bool UseCachedInterfaceDispatch() { return true; } From 08ac0a15adf613d13ef6d9a4ebb333bc18ee2311 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 4 Feb 2025 09:39:56 -0800 Subject: [PATCH 31/41] Fix interface stepping for cached interface dispatch --- src/coreclr/vm/virtualcallstub.cpp | 50 +++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 0beb87dc737bb7..cfd7546786bed1 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -4203,7 +4203,13 @@ BOOL VirtualCallStubManagerManager::CheckIsStub_Internal( WRAPPER_NO_CONTRACT; SUPPORTS_DAC; - // Forwarded to from RangeSectionStubManager +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + return isCachedInterfaceDispatchStub(stubStartAddress); + } +#endif + // Forwarded to from RangeSectionStubManager for other cases return FALSE; } @@ -4216,15 +4222,21 @@ BOOL VirtualCallStubManagerManager::DoTraceStub( VirtualCallStubManager *pMgr = NULL; #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - // Always use the global loader allocator, and find the correct one during the trace itself - pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); + if (UseCachedInterfaceDispatch()) + { + // Always use the global loader allocator, and find the correct one during the trace itself + pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); + } #endif // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef FEATURE_VIRTUAL_STUB_DISPATCH - // Find the owning manager. We should succeed, since presumably someone already - // called CheckIsStub on us to find out that we own the address, and already - // called TraceManager to initiate a trace. - pMgr = FindVirtualCallStubManager(stubStartAddress); + if (!UseCachedInterfaceDispatch()) + { + // Find the owning manager. We should succeed, since presumably someone already + // called CheckIsStub on us to find out that we own the address, and already + // called TraceManager to initiate a trace. + pMgr = FindVirtualCallStubManager(stubStartAddress); + } #endif // FEATURE_VIRTUAL_STUB_DISPATCH CONSISTENCY_CHECK(CheckPointer(pMgr)); @@ -4289,15 +4301,25 @@ BOOL VirtualCallStubManagerManager::TraceManager( { WRAPPER_NO_CONTRACT; + VirtualCallStubManager *pMgr = NULL; + #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - // Always use the global loader allocator, and find the correct one during the trace itself - VirtualCallStubManager *pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); -#else // FEATURE_CACHED_INTERFACE_DISPATCH - // Find the owning manager. We should succeed, since presumably someone already - // called CheckIsStub on us to find out that we own the address. - VirtualCallStubManager *pMgr = FindVirtualCallStubManager(GetIP(pContext)); - CONSISTENCY_CHECK(CheckPointer(pMgr)); + if (UseCachedInterfaceDispatch()) + { + // Always use the global loader allocator, and find the correct one during the trace itself + pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); + } +#endif + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!UseCachedInterfaceDispatch()) + { + // Find the owning manager. We should succeed, since presumably someone already + // called CheckIsStub on us to find out that we own the address. + pMgr = FindVirtualCallStubManager(GetIP(pContext)); + } #endif // FEATURE_CACHED_INTERFACE_DISPATCH + CONSISTENCY_CHECK(CheckPointer(pMgr)); // Forward the call to the appropriate manager. return pMgr->TraceManager(thread, trace, pContext, pRetAddr); From 006537dbe03443c8cd106b479605d1623a99f6b2 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Thu, 6 Feb 2025 17:34:11 -0800 Subject: [PATCH 32/41] Respond to most of the feedback --- src/coreclr/clrfeatures.cmake | 13 ++-- src/coreclr/debug/CMakeLists.txt | 8 +- src/coreclr/debug/daccess/request.cpp | 4 +- .../nativeaot/Runtime/amd64/MiscStubs.asm | 3 - src/coreclr/vm/CMakeLists.txt | 73 ++++--------------- .../vm/amd64/CachedInterfaceDispatchCoreCLR.S | 4 + .../amd64/CachedInterfaceDispatchCoreCLR.asm | 4 + src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 3 + src/coreclr/vm/amd64/virtualcallstubamd64.S | 3 + .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 4 + .../arm64/CachedInterfaceDispatchCoreCLR.asm | 4 + src/coreclr/vm/jitinterface.cpp | 2 +- src/coreclr/vm/prestub.cpp | 2 +- src/coreclr/vm/virtualcallstub.cpp | 8 +- src/coreclr/vm/virtualcallstub.h | 2 +- 15 files changed, 54 insertions(+), 83 deletions(-) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 9c5f071ac16737..f55187a54c7ccb 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -42,12 +42,8 @@ endif(CLR_CMAKE_TARGET_WIN32) # Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) # Only enable in chk/debug builds as this support isn't intended for retail use elsewhere -if (((CMAKE_BUILD_TYPE STREQUAL "Debug") OR (CMAKE_BUILD_TYPE STREQUAL "Checked")) AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)) - if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64) - # Allow 16 byte compare-exchange (cmpxchg16b) - add_compile_options(-mcx16) - endif() - set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH $,1,0>) endif() if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS) @@ -55,3 +51,8 @@ if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVO else() set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) endif() + +if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64) + # Allow 16 byte compare-exchange (cmpxchg16b) + add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>) +endif() diff --git a/src/coreclr/debug/CMakeLists.txt b/src/coreclr/debug/CMakeLists.txt index 26d3369d49d13e..0d52fa77527ea3 100644 --- a/src/coreclr/debug/CMakeLists.txt +++ b/src/coreclr/debug/CMakeLists.txt @@ -1,10 +1,6 @@ -if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) -endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) -if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - add_definitions(-DFEATURE_VIRTUAL_STUB_DISPATCH) -endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) add_subdirectory(daccess) add_subdirectory(ee) diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index 7614f3853c5325..c52098e810e0b3 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -3615,9 +3615,7 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType case CacheEntryHeap: #ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeap = pVcsMgr->cache_entry_heap; -#else - hr = S_OK; -#endif +#endif // FEATURE_VIRTUAL_STUB_DISPATCH break; default: diff --git a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm index 3b2f3147316450..098c402b2106ee 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm @@ -3,9 +3,6 @@ include AsmMacros.inc -EXTERN RhpCidResolve : PROC -EXTERN RhpUniversalTransition_DebugStepTailCall : PROC - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The following helper will access ("probe") a word on each page of the stack ; starting with the page right beneath rsp down to the one pointed to by r11. diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index f4500e4424aa17..fc0dfdaadd9df3 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -44,13 +44,8 @@ if(FEATURE_PERFTRACING) include_directories(${CORECLR_USEREVENTS_SHIM_DIR}) endif(FEATURE_PERFTRACING) -if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - add_definitions(-DFEATURE_CACHED_INTERFACE_DISPATCH) -endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - -if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - add_definitions(-DFEATURE_VIRTUAL_STUB_DISPATCH) -endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) set(VM_SOURCES_DAC_AND_WKS_COMMON appdomain.cpp @@ -300,6 +295,8 @@ set(VM_SOURCES_WKS assemblynative.cpp assemblyspec.cpp baseassemblyspec.cpp + ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchCoreclr.cpp cachelinealloc.cpp callconvbuilder.cpp callhelpers.cpp @@ -381,16 +378,9 @@ set(VM_SOURCES_WKS typeparse.cpp weakreferencenative.cpp yieldprocessornormalized.cpp - ${VM_SOURCES_GDBJIT} +${VM_SOURCES_GDBJIT} ) -if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - list(APPEND VM_SOURCES_WKS - ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp - CachedInterfaceDispatchCoreclr.cpp - ) -endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - # coreclr needs to compile codeman.cpp differently depending on flavor (i.e. dll vs. static lib)) list(REMOVE_ITEM VM_SOURCES_WKS codeman.cpp) @@ -627,6 +617,7 @@ if(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/AsmHelpers.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm ${ARCH_SOURCES_DIR}/ComCallPreStub.asm ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm @@ -639,25 +630,15 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/Context.asm ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm + ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm ) - if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm - ) - endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - - if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm - ) - endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) @@ -680,15 +661,11 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) - if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm - ) - endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - set(VM_HEADERS_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmconstants.h ) @@ -700,6 +677,7 @@ else(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S ${ARCH_SOURCES_DIR}/getstate.S @@ -710,25 +688,14 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/redirectedhandledjitcase.S + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/theprestubamd64.S ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/Context.S ${ARCH_SOURCES_DIR}/unixasmhelpers.S ${ARCH_SOURCES_DIR}/umthunkstub.S + ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S ) - - if(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S - ) - endif(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH) - - if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S - ) - endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/ehhelpers.S @@ -750,16 +717,13 @@ else(CLR_CMAKE_TARGET_WIN32) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) - if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - list(APPEND VM_SOURCES_WKS_ARCH_ASM - ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S - ) - endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S @@ -779,13 +743,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(ASM_SUFFIX S) endif(CLR_CMAKE_TARGET_WIN32) -if(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - set(VM_SOURCES_WKS_ARCH_ASM - ${VM_SOURCES_WKS_ARCH_ASM} - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} - ) -endif(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH) - if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/cgenamd64.cpp diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S index b2a08d23d76254..a3a45be29ddb75 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -5,6 +5,8 @@ #include "unixasmmacros.inc" #include "asmconstants.h" +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // Stub dispatch routine for dispatch to a vtable slot LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT // r11 currently contains the indirection cell address. @@ -65,3 +67,5 @@ NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler TAILJMP_RAX NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm index dcd665f19b4ae0..f5cacb3207e150 100644 --- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -4,6 +4,8 @@ include include AsmConstants.inc +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + extern CID_ResolveWorker:proc extern CID_VirtualOpenDelegateDispatchWorker:proc @@ -69,4 +71,6 @@ NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c510..bbb19107e40715 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -4,6 +4,8 @@ include include AsmConstants.inc +ifdef FEATURE_VIRTUAL_STUB_DISPATCH + CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc @@ -83,4 +85,5 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +endif ;; FEATURE_VIRTUAL_STUB_DISPATCH end diff --git a/src/coreclr/vm/amd64/virtualcallstubamd64.S b/src/coreclr/vm/amd64/virtualcallstubamd64.S index 09c2d608442564..822eaaf2718f6e 100644 --- a/src/coreclr/vm/amd64/virtualcallstubamd64.S +++ b/src/coreclr/vm/amd64/virtualcallstubamd64.S @@ -4,6 +4,8 @@ .intel_syntax noprefix #include "unixasmmacros.inc" +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // This is the number of times a successful chain lookup will occur before the // entry is promoted to the front of the chain. This is declared as extern because // the default value (CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT) is defined in the header. @@ -87,3 +89,4 @@ Fail_RWCLAS: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +#endif // FEATURE_VIRTUAL_STUB_DISPATCH \ No newline at end of file diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index e67d804dd55bd5..eefc07a5bbdce5 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -4,6 +4,8 @@ #include "asmconstants.h" #include "unixasmmacros.inc" +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // // Stub dispatch routine for dispatch to a vtable slot // @@ -69,3 +71,5 @@ EPILOG_WITH_TRANSITION_BLOCK_TAILCALL EPILOG_BRANCH_REG x9 NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm index 078617e669605d..7865fb5e99b568 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -5,6 +5,8 @@ #include "asmconstants.h" #include "asmmacros.h" +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + TEXTAREA EXTERN CID_ResolveWorker @@ -76,4 +78,6 @@ EPILOG_BRANCH_REG x9 NESTED_END CID_VirtualOpenDelegateDispatch +#ifdef ;; FEATURE_CACHED_INTERFACE_DISPATCH + END diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index a10d63e4621fdb..1b685328136860 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13671,7 +13671,7 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, // aren't quite set up to accept that. Furthermore the call sequences would be different - at // the moment an indirection cell uses "call [cell-addr]" on x86, and instead we would want the // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" - DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, slot); + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, slot); INTERFACE_DISPATCH_CACHED_OR_VSD( return FALSE; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index f9501736e5c657..0e8992d10315fd 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -3314,7 +3314,7 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl { // The entry is aligned and the size is correct, so we can use the cached interface dispatch mechanism // to speed up further uses of this interface dispatch slot - DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(pMT, slot); + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(pMT, slot); uintptr_t addr = (uintptr_t)RhpInitialInterfaceDispatch; uintptr_t pCache = (uintptr_t)DispatchToken::ToCachedInterfaceDispatchToken(token); diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index cfd7546786bed1..4579866ac59204 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -134,7 +134,7 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH // We indirect through a cell so that updates can take place atomically. // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. // are allocated in the domain of the dicitonary. - DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(ownerType, methodSlot); + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, methodSlot); PCODE addr; INTERFACE_DISPATCH_CACHED_OR_VSD(addr = (PCODE)RhpInitialInterfaceDispatch, addr = pMgr->GetCallStub(token)) @@ -1087,7 +1087,7 @@ BOOL VirtualCallStubManager::TraceManager(Thread *thread, #ifndef DACCESS_COMPILE -DispatchToken VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot) +DispatchToken VirtualCallStubManager::GetTokenFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot) { CONTRACTL { @@ -1122,7 +1122,7 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) INJECT_FAULT(COMPlusThrowOM();); } CONTRACTL_END; - DispatchToken token = GetTokenFromFromOwnerAndSlot(ownerType, pMD->GetSlot()); + DispatchToken token = GetTokenFromOwnerAndSlot(ownerType, pMD->GetSlot()); return GetCallStub(token); } @@ -1557,7 +1557,7 @@ extern "C" PCODE CID_VirtualOpenDelegateDispatchWorker(TransitionBlock * pTransi GCStress::MaybeTriggerAndProtect(pObj); - DispatchToken token = VirtualCallStubManager::GetTokenFromFromOwnerAndSlot(TypeHandle(pTargetMD->GetMethodTable()), pTargetMD->GetSlot()); + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(TypeHandle(pTargetMD->GetMethodTable()), pTargetMD->GetSlot()); target = CachedInterfaceDispatchResolveWorker(NULL, protectedObj, token); #if _DEBUG diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index e084115e19f20f..73edf67c7b0e51 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -250,7 +250,7 @@ class VirtualCallStubManager : public StubManager PCODE GetVTableCallStub(DWORD slot); #endif // FEATURE_VIRTUAL_STUB_DISPATCH - static DispatchToken GetTokenFromFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); + static DispatchToken GetTokenFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); // Generate an fresh indirection cell. BYTE* GenerateStubIndirection(PCODE stub, DispatchToken token, BOOL fUseRecycledCell = FALSE); From 2ff4d2a998d98ea03c45d1d4175f30351c435a05 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 7 Feb 2025 16:22:03 -0800 Subject: [PATCH 33/41] Feedback and fixes --- docs/design/coreclr/botr/clr-abi.md | 4 +-- src/coreclr/clrfeatures.cmake | 13 ++++++---- .../shared_runtime/amd64/StubDispatch.S | 3 +++ .../Internal/Runtime/ReadyToRunConstants.cs | 2 +- .../arm64/CachedInterfaceDispatchCoreCLR.asm | 2 +- src/coreclr/vm/jitinterface.cpp | 25 ------------------- src/coreclr/vm/prestub.cpp | 9 ------- 7 files changed, 15 insertions(+), 43 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 7da3805324eb62..76b7479e7fb559 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -116,7 +116,7 @@ ARM64-only: When a method returns a structure that is larger than 16 bytes the c ## Hidden parameters -*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (AMD64 NativeAOT ABI) `R10` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. +*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. *Calli Pinvoke* - The VM wants the address of the PInvoke in (AMD64) `R10` / (ARM) `R12` / (ARM64) `R14` (In the JIT: `REG_PINVOKE_TARGET_PARAM`), and the signature (the pinvoke cookie) in (AMD64) `R11` / (ARM) `R4` / (ARM64) `R15` (in the JIT: `REG_PINVOKE_COOKIE_PARAM`). @@ -816,7 +816,7 @@ Therefore it will expand all indirect calls via the validation helper and a manu ## CFG details for x64 On x64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `rcx`. -In addition to the usual registers it also preserves all float registers and `rcx` and `r10`; furthermore, shadow stack space is not required to be allocated. +In addition to the usual registers it also preserves all float registers and `r10`; furthermore, shadow stack space is not required to be allocated. `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index f55187a54c7ccb..4e3899c5774ed6 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -40,15 +40,18 @@ if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) endif(CLR_CMAKE_TARGET_WIN32) -# Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) -# Only enable in chk/debug builds as this support isn't intended for retail use elsewhere -if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) - set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH $,1,0>) -endif() if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS) set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 0) else() + # Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) + # Only enable in chk/debug builds as this support isn't intended for retail use elsewhere + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH $,1,0>) + else() + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 0) + endif() set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) endif() diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.S b/src/coreclr/shared_runtime/amd64/StubDispatch.S index 2b07c127439909..3af2bc6ac019b6 100644 --- a/src/coreclr/shared_runtime/amd64/StubDispatch.S +++ b/src/coreclr/shared_runtime/amd64/StubDispatch.S @@ -4,6 +4,8 @@ .intel_syntax noprefix #include "AsmMacros_Shared.h" +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + // trick to avoid PLT relocation at runtime which corrupts registers #define REL_C_FUNC(name) C_FUNC(name)@gotpcrel @@ -74,3 +76,4 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT +#endif // FEATURE_CACHED_INTERFACE_DISPATCH \ No newline at end of file diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 593f0158536a5e..73fc45b06e2806 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -140,7 +140,7 @@ public enum ReadyToRunFixupKind VirtualEntry = 0x16, // For invoking a virtual method VirtualEntry_DefToken = 0x17, // Smaller version of VirtualEntry - method is def token VirtualEntry_RefToken = 0x18, // Smaller version of VirtualEntry - method is ref token - VirtualEntry_Slot = 0x19, // Smaller version of VirtualEntry - type & slot + VirtualEntry_Slot = 0x19, // Smaller version of VirtualEntry - type & slot - OBSOLETE, not currently used, and hasn't ever been used in R2R codegen since crossgen2 was introduced, and may not have ever been used. Helper = 0x1A, // Helper StringHandle = 0x1B, // String handle diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm index 7865fb5e99b568..82bbc0f96f7389 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -78,6 +78,6 @@ EPILOG_BRANCH_REG x9 NESTED_END CID_VirtualOpenDelegateDispatch -#ifdef ;; FEATURE_CACHED_INTERFACE_DISPATCH +#endif ;; FEATURE_CACHED_INTERFACE_DISPATCH END diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 1b685328136860..a0942d69d7ae7e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13655,31 +13655,6 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, } break; - case ENCODE_VIRTUAL_ENTRY_SLOT: - { - DWORD slot = CorSigUncompressData(pBlob); - - TypeHandle ownerType = ZapSig::DecodeType(currentModule, pInfoModule, pBlob); - - LOG((LF_ZAP, LL_INFO100000, " Fixup stub dispatch\n")); - - VirtualCallStubManager * pMgr = currentModule->GetLoaderAllocator()->GetVirtualCallStubManager(); - - // - // We should be generating a stub indirection here, but the zapper already uses one level - // of indirection, i.e. we would have to return IAT_PPVALUE to the JIT, and on the whole the JITs - // aren't quite set up to accept that. Furthermore the call sequences would be different - at - // the moment an indirection cell uses "call [cell-addr]" on x86, and instead we would want the - // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" - DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, slot); - - INTERFACE_DISPATCH_CACHED_OR_VSD( - return FALSE; // R2R interface dispatch currently only supports fixups with a single pointer, return FALSE to skip using the method - , - result = pMgr->GetCallStub(token); - ); - } - break; #ifdef FEATURE_READYTORUN case ENCODE_READYTORUN_HELPER: { diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index 0e8992d10315fd..c072104c005f6e 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -3278,15 +3278,6 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl goto VirtualEntry; } - case ENCODE_VIRTUAL_ENTRY_SLOT: - { - slot = CorSigUncompressData(pBlob); - pMT = ZapSig::DecodeType(pModule, pInfoModule, pBlob).GetMethodTable(); - - fVirtual = true; - break; - } - default: _ASSERTE(!"Unexpected CORCOMPILE_FIXUP_BLOB_KIND"); ThrowHR(COR_E_BADIMAGEFORMAT); From 70dacc01e2cc11f472a6a826e804e0c5dfef28ea Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 7 Feb 2025 16:29:44 -0800 Subject: [PATCH 34/41] Use runtime as the directory to hold stuff shared between NativeAOT and coreclr --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 2 +- .../{shared_runtime => runtime}/CachedInterfaceDispatch.cpp | 0 .../{shared_runtime => runtime}/CachedInterfaceDispatch.h | 0 src/coreclr/{shared_runtime => runtime}/amd64/StubDispatch.S | 0 src/coreclr/{shared_runtime => runtime}/amd64/StubDispatch.asm | 0 src/coreclr/{shared_runtime => runtime}/arm/StubDispatch.S | 0 src/coreclr/{shared_runtime => runtime}/arm64/StubDispatch.S | 0 src/coreclr/{shared_runtime => runtime}/arm64/StubDispatch.asm | 0 src/coreclr/{shared_runtime => runtime}/i386/StubDispatch.S | 0 src/coreclr/{shared_runtime => runtime}/i386/StubDispatch.asm | 0 .../{shared_runtime => runtime}/loongarch64/StubDispatch.S | 0 src/coreclr/{shared_runtime => runtime}/riscv64/StubDispatch.S | 0 src/coreclr/vm/CMakeLists.txt | 2 +- 13 files changed, 2 insertions(+), 2 deletions(-) rename src/coreclr/{shared_runtime => runtime}/CachedInterfaceDispatch.cpp (100%) rename src/coreclr/{shared_runtime => runtime}/CachedInterfaceDispatch.h (100%) rename src/coreclr/{shared_runtime => runtime}/amd64/StubDispatch.S (100%) rename src/coreclr/{shared_runtime => runtime}/amd64/StubDispatch.asm (100%) rename src/coreclr/{shared_runtime => runtime}/arm/StubDispatch.S (100%) rename src/coreclr/{shared_runtime => runtime}/arm64/StubDispatch.S (100%) rename src/coreclr/{shared_runtime => runtime}/arm64/StubDispatch.asm (100%) rename src/coreclr/{shared_runtime => runtime}/i386/StubDispatch.S (100%) rename src/coreclr/{shared_runtime => runtime}/i386/StubDispatch.asm (100%) rename src/coreclr/{shared_runtime => runtime}/loongarch64/StubDispatch.S (100%) rename src/coreclr/{shared_runtime => runtime}/riscv64/StubDispatch.S (100%) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 8886adf5b28c75..66f888a5d9035c 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,5 +1,5 @@ set(GC_DIR ../../gc) -set(SHARED_RUNTIME_DIR ../../shared_runtime) +set(SHARED_RUNTIME_DIR ../../runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp b/src/coreclr/runtime/CachedInterfaceDispatch.cpp similarity index 100% rename from src/coreclr/shared_runtime/CachedInterfaceDispatch.cpp rename to src/coreclr/runtime/CachedInterfaceDispatch.cpp diff --git a/src/coreclr/shared_runtime/CachedInterfaceDispatch.h b/src/coreclr/runtime/CachedInterfaceDispatch.h similarity index 100% rename from src/coreclr/shared_runtime/CachedInterfaceDispatch.h rename to src/coreclr/runtime/CachedInterfaceDispatch.h diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.S b/src/coreclr/runtime/amd64/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/amd64/StubDispatch.S rename to src/coreclr/runtime/amd64/StubDispatch.S diff --git a/src/coreclr/shared_runtime/amd64/StubDispatch.asm b/src/coreclr/runtime/amd64/StubDispatch.asm similarity index 100% rename from src/coreclr/shared_runtime/amd64/StubDispatch.asm rename to src/coreclr/runtime/amd64/StubDispatch.asm diff --git a/src/coreclr/shared_runtime/arm/StubDispatch.S b/src/coreclr/runtime/arm/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/arm/StubDispatch.S rename to src/coreclr/runtime/arm/StubDispatch.S diff --git a/src/coreclr/shared_runtime/arm64/StubDispatch.S b/src/coreclr/runtime/arm64/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/arm64/StubDispatch.S rename to src/coreclr/runtime/arm64/StubDispatch.S diff --git a/src/coreclr/shared_runtime/arm64/StubDispatch.asm b/src/coreclr/runtime/arm64/StubDispatch.asm similarity index 100% rename from src/coreclr/shared_runtime/arm64/StubDispatch.asm rename to src/coreclr/runtime/arm64/StubDispatch.asm diff --git a/src/coreclr/shared_runtime/i386/StubDispatch.S b/src/coreclr/runtime/i386/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/i386/StubDispatch.S rename to src/coreclr/runtime/i386/StubDispatch.S diff --git a/src/coreclr/shared_runtime/i386/StubDispatch.asm b/src/coreclr/runtime/i386/StubDispatch.asm similarity index 100% rename from src/coreclr/shared_runtime/i386/StubDispatch.asm rename to src/coreclr/runtime/i386/StubDispatch.asm diff --git a/src/coreclr/shared_runtime/loongarch64/StubDispatch.S b/src/coreclr/runtime/loongarch64/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/loongarch64/StubDispatch.S rename to src/coreclr/runtime/loongarch64/StubDispatch.S diff --git a/src/coreclr/shared_runtime/riscv64/StubDispatch.S b/src/coreclr/runtime/riscv64/StubDispatch.S similarity index 100% rename from src/coreclr/shared_runtime/riscv64/StubDispatch.S rename to src/coreclr/runtime/riscv64/StubDispatch.S diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index fc0dfdaadd9df3..864ea84aa4eaff 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,6 +1,6 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(SHARED_RUNTIME_DIR ../shared_runtime) +set(SHARED_RUNTIME_DIR ../runtime) # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) From 3f44b9b797986d95f37e0262fe470aed076e503f Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 10 Feb 2025 13:56:48 -0800 Subject: [PATCH 35/41] Address more feedback --- src/coreclr/debug/daccess/dacdbiimpl.cpp | 2 +- src/coreclr/debug/daccess/request.cpp | 7 +++-- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 10 +++---- .../Runtime/CachedInterfaceDispatchAot.cpp | 5 ++-- .../Runtime/CachedInterfaceDispatchPal.h | 3 +- .../nativeaot/Runtime/Full/CMakeLists.txt | 10 +++---- .../nativeaot/Runtime/Portable/CMakeLists.txt | 8 ++--- .../Runtime/eventpipe/CMakeLists.txt | 30 +++++++++---------- src/coreclr/vm/CMakeLists.txt | 16 +++++----- .../vm/CachedInterfaceDispatchCoreclr.cpp | 3 +- src/coreclr/vm/CachedInterfaceDispatchPal.h | 18 ++++------- src/coreclr/vm/amd64/asmconstants.h | 2 -- src/coreclr/vm/comdelegate.cpp | 2 +- 13 files changed, 57 insertions(+), 59 deletions(-) diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp index 922925c19bbe4b..a5f5f7e7653a57 100644 --- a/src/coreclr/debug/daccess/dacdbiimpl.cpp +++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp @@ -3546,7 +3546,7 @@ void DacDbiInterfaceImpl::EnumerateMemRangesForLoaderAllocator(PTR_LoaderAllocat if (pVcsMgr->indcell_heap != NULL) heapsToEnumerate.Push(pVcsMgr->indcell_heap); #ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (pVcsMgr->cache_entry_heap != NULL) heapsToEnumerate.Push(pVcsMgr->cache_entry_heap); -#endif +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } TADDR rangeAccumAsTaddr = TO_TADDR(rangeAcummulator); diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index c52098e810e0b3..94ea2935875a1c 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -3614,6 +3614,9 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType case CacheEntryHeap: #ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // The existence of the CacheEntryHeap is part of the SOS api surface, but currently + // when FEATURE_VIRTUAL_STUB_DISPATCH is not defined, the CacheEntryHeap is not created + // so its commented out in that situation, but is not considered to be a E_INVALIDARG. pLoaderHeap = pVcsMgr->cache_entry_heap; #endif // FEATURE_VIRTUAL_STUB_DISPATCH break; @@ -3667,7 +3670,7 @@ static const char *LoaderAllocatorLoaderHeapNames[] = "IndcellHeap", #ifdef FEATURE_VIRTUAL_STUB_DISPATCH "CacheEntryHeap", -#endif +#endif // FEATURE_VIRTUAL_STUB_DISPATCH }; @@ -3713,7 +3716,7 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->indcell_heap); #ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->cache_entry_heap); -#endif +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } // All of the above are "LoaderHeap" and not the ExplicitControl version. diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 66f888a5d9035c..bcf2ad4030b01c 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,10 +1,10 @@ set(GC_DIR ../../gc) -set(SHARED_RUNTIME_DIR ../../runtime) +set(RUNTIME_DIR ../../runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp - ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp CachedInterfaceDispatchAot.cpp Crst.cpp DebugHeader.cpp @@ -78,7 +78,7 @@ include_directories(.) include_directories(${GC_DIR}) include_directories(${GC_DIR}/env) include_directories(${CMAKE_CURRENT_BINARY_DIR}/eventpipe/inc) -include_directories(${SHARED_RUNTIME_DIR}) +include_directories(${RUNTIME_DIR}) if (WIN32) set(GC_HEADERS @@ -211,7 +211,7 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/MiscStubs.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/PInvoke.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/InteropThunksHelpers.${ASM_SUFFIX} - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/UniversalTransition.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) @@ -298,7 +298,7 @@ if (CLR_CMAKE_TARGET_UNIX) endif(CLR_CMAKE_TARGET_UNIX) -set(RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(NATIVEAOT_RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND COMMON_RUNTIME_SOURCES ${GC_HEADERS}) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp index 4244e3617e9f79..8eb16e8b630983 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp @@ -26,10 +26,11 @@ void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) return g_pAllocHeap->AllocAligned(size, sizeof(void*) * 2); } -// Allocate memory aligned at at least sizeof(void*) +// Allocate memory aligned at sizeof(void*) boundaries + void *InterfaceDispatch_AllocPointerAligned(size_t size) { - return g_pAllocHeap->Alloc(size); + return g_pAllocHeap->AllocAligned(size, sizeof(void*)); } FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h index 3b2762ad4185c5..7edc8347b28848 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h @@ -31,7 +31,8 @@ bool InterfaceDispatch_InitializePal(); // Allocate memory aligned at sizeof(void*)*2 boundaries void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); -// Allocate memory aligned at at least sizeof(void*) +// Allocate memory aligned at sizeof(void*) boundaries + void *InterfaceDispatch_AllocPointerAligned(size_t size); #endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt index 11618fd78edc0a..821b4fe8ca9e2e 100644 --- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt @@ -59,16 +59,16 @@ endif (CLR_CMAKE_TARGET_WIN32) # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) else() set(COMPILER_LANGUAGE -x c++) set(PREPROCESSOR_FLAGS -E -P) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) endif() add_custom_command( @@ -80,9 +80,9 @@ add_custom_command( ) add_custom_command( - COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" + COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${NATIVEAOT_RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" - DEPENDS "${ASM_OFFSETS_CPP}" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${ASM_OFFSETS_CPP}" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" COMMENT "Generating AsmOffsets.inc" ) diff --git a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt index 46a85046ca1f2b..8a33f1d14056c4 100644 --- a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt @@ -10,12 +10,12 @@ target_link_libraries(PortableRuntime PRIVATE aotminipal) # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) set_target_properties(aotminipal PROPERTIES COMPILE_PDB_NAME "aotminipal" @@ -23,14 +23,14 @@ if(WIN32) else() set(COMPILER_LANGUAGE -x c++) set(PREPROCESSOR_FLAGS -E -P) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) endif() add_custom_command( # The AsmOffsetsPortable.cs is consumed later by the managed build TARGET PortableRuntime COMMAND ${CMAKE_CXX_COMPILER} ${COMPILER_LANGUAGE} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CSPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsetsPortable.cs" - DEPENDS "${RUNTIME_DIR}/AsmOffsets.cpp" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.cpp" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" ) install_static_library(PortableRuntime aotsdk nativeaot) diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt index 739738e6743465..cbda686ed0d100 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt @@ -123,15 +123,15 @@ list(APPEND AOT_EVENTPIPE_SHIM_HEADERS list(APPEND AOT_EVENTPIPE_MANAGED_TO_NATIVE_SOURCES - ${RUNTIME_DIR}/eventpipeinternal.cpp - ${RUNTIME_DIR}/EnabledEventPipeInterface.cpp - ${RUNTIME_DIR}/runtimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/EnabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/runtimeeventinternal.cpp ) if (FEATURE_EVENT_TRACE) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace.cpp - ${RUNTIME_DIR}/profheapwalkhelper.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/profheapwalkhelper.cpp ) # These are carry-overs from .NET Native and only included for ETW currently @@ -139,15 +139,15 @@ if (FEATURE_EVENT_TRACE) # gcheap : GCHeapDump, GCHeapSurvivalAndMovement - not prioritizing for nativeaot yet if (FEATURE_ETW) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace_bulktype.cpp - ${RUNTIME_DIR}/eventtrace_gcheap.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_bulktype.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_gcheap.cpp ) endif() if(CLR_CMAKE_TARGET_WIN32) - set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") endif() endif() @@ -169,7 +169,7 @@ set_target_properties(eventpipe-shared-objects PROPERTIES ) if (CLR_CMAKE_TARGET_WIN32) target_compile_options(eventpipe-shared-objects PRIVATE - "/FI${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") + "/FI${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") # Install the compile PDB for the eventpipe unity builds. install(FILES "${CMAKE_CURRENT_BINARY_DIR}/$/eventpipe-shared-objects.pdb" DESTINATION aotsdk COMPONENT nativeaot) @@ -189,10 +189,10 @@ list(APPEND EVENTPIPE_SOURCES ) list(APPEND AOT_EVENTPIPE_DISABLED_SOURCES - ${RUNTIME_DIR}/DisabledEventPipeInterface.cpp - ${RUNTIME_DIR}/disabledeventpipeinternal.cpp - ${RUNTIME_DIR}/disabledeventtrace.cpp - ${RUNTIME_DIR}/disabledruntimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/DisabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledruntimeeventinternal.cpp ${GEN_EVENTPIPE_PLAT_AGNOSTIC_DISABLED_SOURCES} ) diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 864ea84aa4eaff..caa297f59f0bf6 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,13 +1,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(SHARED_RUNTIME_DIR ../runtime) +set(RUNTIME_DIR ../runtime) # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${ARCH_SOURCES_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../interop/inc) include_directories(${CLR_SRC_NATIVE_DIR}) -include_directories(${SHARED_RUNTIME_DIR}) +include_directories(${RUNTIME_DIR}) # needed when zLib compression is used include_directories(${CLR_SRC_NATIVE_DIR}/libs/System.IO.Compression.Native) @@ -295,7 +295,7 @@ set(VM_SOURCES_WKS assemblynative.cpp assemblyspec.cpp baseassemblyspec.cpp - ${SHARED_RUNTIME_DIR}/CachedInterfaceDispatch.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp CachedInterfaceDispatchCoreclr.cpp cachelinealloc.cpp callconvbuilder.cpp @@ -378,7 +378,7 @@ set(VM_SOURCES_WKS typeparse.cpp weakreferencenative.cpp yieldprocessornormalized.cpp -${VM_SOURCES_GDBJIT} + ${VM_SOURCES_GDBJIT} ) # coreclr needs to compile codeman.cpp differently depending on flavor (i.e. dll vs. static lib)) @@ -630,7 +630,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/Context.asm @@ -661,7 +661,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) @@ -688,7 +688,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/redirectedhandledjitcase.S - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/theprestubamd64.S ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/Context.S @@ -721,7 +721,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S - ${SHARED_RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) diff --git a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp index 2873081b02df61..66a359ffbd3b63 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp +++ b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp @@ -14,7 +14,8 @@ void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR) * 2); } -// Allocate memory aligned at at least sizeof(void*) +// Allocate memory aligned at sizeof(void*) boundaries + void *InterfaceDispatch_AllocPointerAligned(size_t size) { return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR)); diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index d62a9a55847843..49af3535540e00 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -13,14 +13,15 @@ bool InterfaceDispatch_InitializePal(); // Allocate memory aligned at sizeof(void*)*2 boundaries void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); -// Allocate memory aligned at at least sizeof(void*) +// Allocate memory aligned at sizeof(void*) boundaries + void *InterfaceDispatch_AllocPointerAligned(size_t size); enum Flags { // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for // extra fields on this type. - // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such + // OR if the m_pCache value is less than 0x1000 then this is a vtable offset and should be used as such IDC_CachePointerPointsIsVTableOffset = 0x2, IDC_CachePointerPointsAtCache = 0x0, IDC_CachePointerMask = 0x3, @@ -128,7 +129,7 @@ struct InterfaceDispatchCell // synchronization requirements of the code that updates these at runtime and the instructions generated // by the binder for interface call sites. TADDR m_pStub; // Call this code to execute the interface dispatch - volatile TADDR m_pCache; // Context used by the stub above (one or both of the low two bits are set + Volatile m_pCache; // Context used by the stub above (one or both of the low two bits are set // for initial dispatch, and if not set, using this as a cache pointer or // as a vtable offset.) DispatchCellInfo GetDispatchCellInfo() @@ -155,14 +156,7 @@ struct InterfaceDispatchCell static bool IsCache(TADDR value) { - if ((value & IDC_CachePointerMask) != 0) - { - return false; - } - else - { - return true; - } + return (value & IDC_CachePointerMask) == 0; } static bool IsVTableOffset(TADDR value) @@ -181,7 +175,7 @@ struct InterfaceDispatchCell } else { - return 0; + return nullptr; } } }; diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index cc10d134101531..a63c54e6eabc7e 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -546,8 +546,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof( ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) #endif // FEATURE_CACHED_INTERFACE_DISPATCH -//ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) - #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 80de23ea56adaa..d91e3dfc895d0c 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -1877,7 +1877,7 @@ MethodDesc *COMDelegate::GetMethodDesc(OBJECTREF orDelegate) } if (fOpenVirtualDelegate) - pMethodHandle = (MethodDesc*)thisDel->GetInvocationCount(); + pMethodHandle = GetMethodDescForOpenVirtualDelegate(thisDel); else pMethodHandle = FindDelegateInvokeMethod(thisDel->GetMethodTable()); } From bea72d04cebe8ba984804a4c09c10804fabda663 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 11 Feb 2025 10:07:10 -0800 Subject: [PATCH 36/41] Update preserved registers in CLR ABI documentation --- docs/design/coreclr/botr/clr-abi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 76b7479e7fb559..9ada3b0b0cc8e1 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -816,7 +816,7 @@ Therefore it will expand all indirect calls via the validation helper and a manu ## CFG details for x64 On x64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `rcx`. -In addition to the usual registers it also preserves all float registers and `r10`; furthermore, shadow stack space is not required to be allocated. +In addition to the usual registers it also preserves all float registers, `rcx`, and `r10`; furthermore, shadow stack space is not required to be allocated. `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. From 2b744f86f8ecbd4efbc9f2e574fa42486c85a3fc Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 11 Feb 2025 13:48:52 -0800 Subject: [PATCH 37/41] Adjust to changes upstream --- src/coreclr/vm/virtualcallstub.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 4eb6f774321417..c86eca1cc56e05 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1528,7 +1528,7 @@ extern "C" PCODE CID_VirtualOpenDelegateDispatchWorker(TransitionBlock * pTransi Thread::ObjectRefFlush(CURRENT_THREAD); #endif - FrameWithCookie frame(pTransitionBlock); + StubDispatchFrame frame(pTransitionBlock); StubDispatchFrame * pSDFrame = &frame; OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); @@ -1604,7 +1604,7 @@ extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, Thread::ObjectRefFlush(CURRENT_THREAD); #endif - FrameWithCookie frame(pTransitionBlock); + StubDispatchFrame frame(pTransitionBlock); StubDispatchFrame * pSDFrame = &frame; PCODE returnAddress = pSDFrame->GetUnadjustedReturnAddress(); From becc4ce584e3ad8168a54661034446c046aabc6e Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 3 Mar 2025 13:36:24 -0800 Subject: [PATCH 38/41] First set of PR feedback from Katelyn --- .../vm/arm64/CachedInterfaceDispatchCoreCLR.S | 12 ++++++------ .../vm/arm64/CachedInterfaceDispatchCoreCLR.asm | 12 ++++++------ src/coreclr/vm/genericdict.cpp | 17 +++++++++++++++++ src/coreclr/vm/prestub.cpp | 2 +- src/coreclr/vm/virtualcallstub.cpp | 16 ---------------- src/coreclr/vm/virtualcallstub.h | 2 ++ 6 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S index eefc07a5bbdce5..f0d7f3bf433017 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -11,27 +11,27 @@ // LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // r11 currently contains the indirection cell address. - // load r11 to point to the vtable offset (which is stored in the m_pCache field). + // x11 currently contains the indirection cell address. + // load x11 to point to the vtable offset (which is stored in the m_pCache field). ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - // r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust // to get to the VTable chunk lsr x10, x11, #32 - // Load the MethodTable from the object instance in rcx, and add it to the vtable offset + // Load the MethodTable from the object instance in x0, and add it to the vtable offset // to get the address in the vtable chunk list of what we want to dereference ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x9, [x0] add x9, x10, x9 - // Load the target address of the vtable chunk into rax + // Load the target address of the vtable chunk into x9 ldr x9, [x9] // Compute the chunk offset ubfx x10, x11, #16, #16 - // Load the target address of the virtual function into rax + // Load the target address of the virtual function into x9 ldr x9, [x9, x10] EPILOG_BRANCH_REG x9 diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm index 82bbc0f96f7389..4b117a0336e6b2 100644 --- a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -17,27 +17,27 @@ ;; LEAF_ENTRY RhpVTableOffsetDispatch - ;; r11 currently contains the indirection cell address. - ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + ;; x11 currently contains the indirection cell address. + ;; load x11 to point to the vtable offset (which is stored in the m_pCache field). ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust ;; to get to the VTable chunk lsr x10, x11, #32 - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset ;; to get the address in the vtable chunk list of what we want to dereference ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr x9, [x0] add x9, x10, x9 - ;; Load the target address of the vtable chunk into rax + ;; Load the target address of the vtable chunk into x9 ldr x9, [x9] ;; Compute the chunk offset ubfx x10, x11, #16, #16 - ;; Load the target address of the virtual function into rax + ;; Load the target address of the virtual function into x9 ldr x9, [x9, x10] EPILOG_BRANCH_REG x9 diff --git a/src/coreclr/vm/genericdict.cpp b/src/coreclr/vm/genericdict.cpp index 29414983ab306b..eea8def4f4ed9c 100644 --- a/src/coreclr/vm/genericdict.cpp +++ b/src/coreclr/vm/genericdict.cpp @@ -1042,6 +1042,23 @@ Dictionary::PopulateEntry( if (fRequiresDispatchStub) { LoaderAllocator * pDictLoaderAllocator = (pMT != NULL) ? pMT->GetLoaderAllocator() : pMD->GetLoaderAllocator(); + // Generate a dispatch stub and gather a slot. + // + // We generate an indirection so we don't have to write to the dictionary + // when we do updates, and to simplify stub indirect callsites. Stubs stored in + // dictionaries use "RegisterIndirect" stub calling, e.g. "call [eax]", + // i.e. here the register "eax" would contain the value fetched from the dictionary, + // which in turn points to the stub indirection which holds the value the current stub + // address itself. If we just used "call eax" then we wouldn't know which stub indirection + // to update. If we really wanted to avoid the extra indirection we could return the _address_ of the + // dictionary entry to the caller, still using "call [eax]", and then the + // stub dispatch mechanism can update the dictitonary itself and we don't + // need an indirection. + // + // We indirect through a cell so that updates can take place atomically. + // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. + // are allocated in the domain of the dictionary. + result = (CORINFO_GENERIC_HANDLE)GenerateDispatchStubCellEntrySlot(pDictLoaderAllocator, ownerType, methodSlot, NULL); break; } diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index f6d356e25a3782..a6f32b8d52128e 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -3374,7 +3374,7 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl // But we don't have the address of the initial R2R stub, as that is part of the R2R image // However, we can rely on the detail that the cache value will never be 0 once it is updated // So we read the indirection cell data, and if the cache portion is 0, we attempt to update the complete cell - if (rgComparand[1] == 0 && PalInterlockedCompareExchange128((int64_t*)pIndirection, rgComparand[1], rgComparand[0], rgComparand)) + if (rgComparand[1] == 0 && PalInterlockedCompareExchange128((int64_t*)pIndirection, rgComparand[1], rgComparand[0], rgComparand) && rgComparand[1] == 0) { PalInterlockedCompareExchange128((int64_t*)pIndirection, pCache, addr, rgComparand); } diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index c86eca1cc56e05..fa9f6097289a43 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -116,24 +116,8 @@ BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver) { - // Generate a dispatch stub and gather a slot. - // - // We generate an indirection so we don't have to write to the dictionary - // when we do updates, and to simplify stub indirect callsites. Stubs stored in - // dictionaries use "RegisterIndirect" stub calling, e.g. "call [eax]", - // i.e. here the register "eax" would contain the value fetched from the dictionary, - // which in turn points to the stub indirection which holds the value the current stub - // address itself. If we just used "call eax" then we wouldn't know which stub indirection - // to update. If we really wanted to avoid the extra indirection we could return the _address_ of the - // dictionary entry to the caller, still using "call [eax]", and then the - // stub dispatch mechanism can update the dictitonary itself and we don't - // need an indirection. - VirtualCallStubManager * pMgr = pLoaderAllocator->GetVirtualCallStubManager(); - // We indirect through a cell so that updates can take place atomically. - // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. - // are allocated in the domain of the dicitonary. DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, methodSlot); PCODE addr; diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 73edf67c7b0e51..ae0fa12851a1e0 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -1655,6 +1655,8 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH #if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) inline bool UseCachedInterfaceDispatch() { return g_pConfig->UseCachedInterfaceDispatch(); } + +// INTERFACE_DISPATCH_CACHED_OR_VSD is a macro used to swap between cached interface dispatch and virtual stub dispatch. #define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispath; } #elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) inline bool UseCachedInterfaceDispatch() { return true; } From 5bd704019f8a48a994bd8660aeb416dd70f92fca Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 3 Mar 2025 15:27:36 -0800 Subject: [PATCH 39/41] Next set of feedback --- src/coreclr/vm/CachedInterfaceDispatchPal.h | 10 ++++++++++ src/coreclr/vm/arm64/asmconstants.h | 2 -- src/coreclr/vm/comdelegate.cpp | 1 - src/coreclr/vm/prestub.cpp | 8 ++------ src/coreclr/vm/riscv64/asmconstants.h | 2 -- src/coreclr/vm/virtualcallstub.cpp | 7 +++++-- 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h index 49af3535540e00..b5fe783229271b 100644 --- a/src/coreclr/vm/CachedInterfaceDispatchPal.h +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -61,6 +61,15 @@ struct DispatchCellInfo { if (CellType == DispatchCellType::VTableOffset) { + // The vtable offset is stored in a pointer sized field, but actually represents 2 values. + // 1. The offset of the first indirection to use. which is stored in the upper half of the + // pointer sized field (bits 16-31 of a 32 bit pointer, or bits 32-63 of a 64 bit pointer). + // + // 2. The offset of the second indirection, which is a stored is the upper half of the lower + // half of the pointer size field (bits 8-15 of a 32 bit pointer, or bits 16-31 of a 64 + // bit pointer) This second offset is always less than 255, so we only really need a single + // byte, and the assembly code on some architectures may take a dependency on that + // so the VTableOffsetToSlot function has a mask to ensure that it is only ever a single byte. uint32_t slot = Token.GetSlotNumber(); unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; @@ -78,6 +87,7 @@ struct DispatchCellInfo static unsigned VTableOffsetToSlot(uintptr_t vtableOffset) { + // See comment in GetVTableOffset() for what we're doing here. unsigned offsetOfIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 2)); unsigned offsetAfterIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 4)) & 0xFF; unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 9eab1162cc9aac..fbe1626d435848 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -174,8 +174,6 @@ ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT) //ASMCONSTANTS_C_ASSERT((1<GetLoaderAllocator()->GetVirtualCallStubManager(); - _ASSERTE(!UseCachedInterfaceDispatch()); // This code path is not yet ready PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); _ASSERTE(pTargetCall); return pTargetCall; diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index a6f32b8d52128e..685ff0839fd350 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -3390,8 +3390,7 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl #endif } - // We don't yet have a proper implementation for cached interface stubs in R2R code, so instead of finding stubs, simply do the resolution in pure C++ - // and skip updating the indirection cell + // We lost the race or the R2R image was generated without cached interface dispatch support, simply do the resolution in pure C++ DispatchToken token; if (pMT->IsInterface()) { @@ -3413,10 +3412,7 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl DispatchToken token; if (pMT->IsInterface()) { - if (pMT->IsInterface()) - token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); - else - token = DispatchToken::CreateDispatchToken(slot); + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, STUB_CODE_BLOCK_VSD_LOOKUP_STUB); diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 47f02c8a75cdfc..c9fc2886141ed6 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -152,8 +152,6 @@ ASMCONSTANTS_C_ASSERT(FaultingExceptionFrame__m_fFilterExecuted == offsetof(Faul ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); -#define ResolveCacheElem__pMT 0x00 -#define ResolveCacheElem__token 0x08 #define ResolveCacheElem__target 0x10 #define ResolveCacheElem__pNext 0x18 ASMCONSTANTS_C_ASSERT(ResolveCacheElem__target == offsetof(ResolveCacheElem, target)); diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index fa9f6097289a43..2e8d36ae43ce54 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -121,7 +121,10 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, methodSlot); PCODE addr; - INTERFACE_DISPATCH_CACHED_OR_VSD(addr = (PCODE)RhpInitialInterfaceDispatch, addr = pMgr->GetCallStub(token)) + INTERFACE_DISPATCH_CACHED_OR_VSD( + addr = (PCODE)RhpInitialInterfaceDispatch // Always use the initial dispatch stub for cached interface dispatch + , + addr = pMgr->GetCallStub(token)) // Acquire a stub which is token specific in the VSD case BYTE* indcell = pMgr->GenerateStubIndirection(addr, token, pResolver != NULL); @@ -1265,7 +1268,7 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke allocationSize += sizeof(CachedIndirectionCellBlockListNode); } #endif // FEATURE_CACHED_INTERFACE_DISPATCH - BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(cellsPerBlock * sizeOfIndCell, alignment); + BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(allocationSize, alignment); #ifdef FEATURE_CACHED_INTERFACE_DISPATCH if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) From 0ac5629b1f9fbe061c42f71acf6c3927b9f693a5 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 3 Mar 2025 16:44:50 -0800 Subject: [PATCH 40/41] Code review add CONSISTENCY_CHECK back. --- src/coreclr/vm/virtualcallstub.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 2e8d36ae43ce54..abe4de458e6608 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -627,6 +627,8 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); #ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size += (cPagesPerHeap + cPagesRemainder) * GetOsPageSize(); +#else + indcell_heap_reserve_size += (cPagesPerHeap + cPagesRemainder) * GetOsPageSize(); #endif // FEATURE_VIRTUAL_STUB_DISPATCH } @@ -634,6 +636,8 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) CONSISTENCY_CHECK((indcell_heap_reserve_size + cache_entry_heap_reserve_size)== dwTotalReserveMemSize); +#else + CONSISTENCY_CHECK(indcell_heap_reserve_size == dwTotalReserveMemSize); #endif // FEATURE_VIRTUAL_STUB_DISPATCH } From 18fd107a1aa0f35b31fb6a7bd7a4805237854a35 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 4 Mar 2025 10:33:12 -0800 Subject: [PATCH 41/41] Refactoring feedback. Founda bug where the linked list was incomplete --- src/coreclr/vm/virtualcallstub.cpp | 17 ++++++++++------- src/coreclr/vm/virtualcallstub.h | 16 +++++++++++++--- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index abe4de458e6608..4b84dd4b5d6576 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1227,6 +1227,13 @@ VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) // m_RecycledIndCellList when it is finalized. // //+---------------------------------------------------------------------------- +BYTE* GetStubIndirectionCell(BYTE** pBlocksStart, UINT32 index, UINT32 sizeOfIndCell) +{ + LIMITED_METHOD_CONTRACT; + + return ((BYTE*)pBlocksStart) + (index * sizeOfIndCell); +} + BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToken token, BOOL fUseRecycledCell /* = FALSE*/ ) { CONTRACT (BYTE*) { @@ -1289,23 +1296,19 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToke // link all the cells together // we don't need to null terminate the linked list, InsertIntoFreeIndCellList will do it. - BYTE** pBlockCur = pBlock; - for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) + for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) // Setup linked list between entries 1 to n { - BYTE** pBlockNext = (BYTE**)(((BYTE*)pBlockCur) + sizeOfIndCell); - *pBlockCur = (BYTE *)pBlockNext; - pBlockCur = (BYTE**)pBlockNext; + *(BYTE**)GetStubIndirectionCell(pBlock, i, sizeOfIndCell) = GetStubIndirectionCell(pBlock, i + 1, sizeOfIndCell); } // insert the list into the free indcell list. - InsertIntoFreeIndCellList((((BYTE*)pBlock) + sizeOfIndCell), (((BYTE*)pBlock) + ((cellsPerBlock - 1) * sizeOfIndCell))); + InsertIntoFreeIndCellList(GetStubIndirectionCell(pBlock, 1, sizeOfIndCell), GetStubIndirectionCell(pBlock, cellsPerBlock - 1, sizeOfIndCell)); } INTERFACE_DISPATCH_CACHED_OR_VSD( InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)ret; pCell->m_pStub = target; pCell->m_pCache = DispatchToken::ToCachedInterfaceDispatchToken(token); - ret = (BYTE *)pCell; , *((PCODE *)ret) = target; ) diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index ae0fa12851a1e0..b714683302c9ca 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -620,6 +620,16 @@ class VirtualCallStubManager : public StubManager PRECONDITION(m_indCellLock.OwnedByCurrentThread()); } CONTRACTL_END; +#ifdef DEBUG + // Assert that head and tail are actually linked together + BYTE **p = (BYTE**)head; + while (p != (BYTE**)tail) + { + p = (BYTE **)*p; + _ASSERTE(p != NULL); + } +#endif // DEBUG + BYTE * temphead = *ppList; *((BYTE**)tail) = temphead; *ppList = head; @@ -1657,13 +1667,13 @@ BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeH inline bool UseCachedInterfaceDispatch() { return g_pConfig->UseCachedInterfaceDispatch(); } // INTERFACE_DISPATCH_CACHED_OR_VSD is a macro used to swap between cached interface dispatch and virtual stub dispatch. -#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispath; } +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispatch; } #elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) inline bool UseCachedInterfaceDispatch() { return true; } -#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { cachedDispatch; } +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) { cachedDispatch; } #elif defined(FEATURE_VIRTUAL_STUB_DISPATCH) inline bool UseCachedInterfaceDispatch() { return false; } -#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispath) { vsdDispath; } +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) { vsdDispatch; } #else #error "No dispatch mechanism defined" #endif