From 86718e16d91a485a3809f8263dea5573b6ed59ae Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 29 Mar 2018 14:51:25 -0700 Subject: [PATCH 1/2] GCStress: try to reduce and tolerate races This change addresses races that cause spurious failures in when running GC stress on multithreaded applications. * Instruction update race Threads that hit a gc cover interrupt where gc is not safe can race to overrwrite the interrupt instruction and change it back to the original instruction. This can cause confusion when handling stress exceptions as the exception code raised by the kernel may be determined by disassembling the instruction that caused the fault, and this instruction may now change between the time the fault is raised and the instruction is disassembled. When this happens the kernel may report an ACCESS_VIOLATION where there was actually an attempt to execute a priveledged instruction. x86 already had a tolerance mechanism here where when gc stress was active and the exception status was ACCESS_VIOLATION the faulting instruction would be retried to see if it faults the same way again. In this change we extend this to tolerance to cover x64 and also enable it regardless of the gc mode. We use the exception information to further screen as these spurious AVs look like reads from address 0xFF..FF. * Instrumentation vs execution race The second race happens when one thread is jitting a method and another is about to call the method. The first thread finishes jitting and publishes the method code, then starts instrumenting the method for gc coverage. While this instrumentation is ongoing, the second thread then calls the method and hits a gc interrupt instruction. The code that recognizes the fault as a gc coverage interrupt gets confused as the instrumentation is not yet complete -- in particular the m_GcCover member of the MethodDesc is not yet set. So the second thread triggers an assert. The fix for this is to instrument for GcCoverage before publishing the code. Since multiple threads can be jitting a method concurrently the instrument and public steps are done under a lock to ensure that the instrumentation and code are consistent (come from the same thread). With this lock in place we have removed the secondary locking done in SetupGcCoverage as it is no longer needed; only one thread can be instrumenting a given jitted method for GcCoverage. * Instruction cache flushes In some cases when replacing the interrupt instruction with the original the instruction cache was either not flushed or not flushed with sufficient length. This possibly leads to an increased frequency of the above races. No impact for non-gc stress scenarios. Addresses the spurious GC stress failures seen in #17027 and #17610. --- src/inc/CrstTypes.def | 4 + src/inc/crsttypes.h | 213 ++++++++++++++++++----------------- src/inc/switches.h | 6 + src/vm/ceemain.cpp | 4 + src/vm/dynamicmethod.cpp | 5 + src/vm/excep.cpp | 37 +++--- src/vm/excep.h | 2 +- src/vm/exceptionhandling.cpp | 4 +- src/vm/gccover.cpp | 73 ++---------- src/vm/i386/excepx86.cpp | 9 +- src/vm/method.hpp | 8 ++ src/vm/prestub.cpp | 59 +++++++--- src/vm/threads.cpp | 4 +- src/vm/threads.h | 8 +- 14 files changed, 221 insertions(+), 215 deletions(-) diff --git a/src/inc/CrstTypes.def b/src/inc/CrstTypes.def index a04839472d0e..f8632e047648 100644 --- a/src/inc/CrstTypes.def +++ b/src/inc/CrstTypes.def @@ -352,6 +352,10 @@ Crst NativeImageCache Unordered End +Crst GCCover + AcquiredBefore LoaderHeap +End + Crst GCMemoryPressure End diff --git a/src/inc/crsttypes.h b/src/inc/crsttypes.h index f2997216f9e7..4e2e5b19ad37 100644 --- a/src/inc/crsttypes.h +++ b/src/inc/crsttypes.h @@ -83,111 +83,112 @@ enum CrstType CrstFusionPolicyConfigPool = 64, CrstFusionSingleUse = 65, CrstFusionWarningLog = 66, - CrstGCMemoryPressure = 67, - CrstGlobalStrLiteralMap = 68, - CrstHandleTable = 69, - CrstHostAssemblyMap = 70, - CrstHostAssemblyMapAdd = 71, - CrstIbcProfile = 72, - CrstIJWFixupData = 73, - CrstIJWHash = 74, - CrstILFingerprintCache = 75, - CrstILStubGen = 76, - CrstInlineTrackingMap = 77, - CrstInstMethodHashTable = 78, - CrstInterfaceVTableMap = 79, - CrstInterop = 80, - CrstInteropData = 81, - CrstIOThreadpoolWorker = 82, - CrstIsJMCMethod = 83, - CrstISymUnmanagedReader = 84, - CrstJit = 85, - CrstJitGenericHandleCache = 86, - CrstJitPerf = 87, - CrstJumpStubCache = 88, - CrstLeafLock = 89, - CrstListLock = 90, - CrstLoaderAllocator = 91, - CrstLoaderAllocatorReferences = 92, - CrstLoaderHeap = 93, - CrstMda = 94, - CrstMetadataTracker = 95, - CrstModIntPairList = 96, - CrstModule = 97, - CrstModuleFixup = 98, - CrstModuleLookupTable = 99, - CrstMulticoreJitHash = 100, - CrstMulticoreJitManager = 101, - CrstMUThunkHash = 102, - CrstNativeBinderInit = 103, - CrstNativeImageCache = 104, - CrstNls = 105, - CrstNotifyGdb = 106, - CrstObjectList = 107, - CrstOnEventManager = 108, - CrstPatchEntryPoint = 109, - CrstPEFileSecurityManager = 110, - CrstPEImage = 111, - CrstPEImagePDBStream = 112, - CrstPendingTypeLoadEntry = 113, - CrstPinHandle = 114, - CrstPinnedByrefValidation = 115, - CrstProfilerGCRefDataFreeList = 116, - CrstProfilingAPIStatus = 117, - CrstPublisherCertificate = 118, - CrstRCWCache = 119, - CrstRCWCleanupList = 120, - CrstRCWRefCache = 121, - CrstReadyToRunEntryPointToMethodDescMap = 122, - CrstReDacl = 123, - CrstReflection = 124, - CrstReJITDomainTable = 125, - CrstReJITGlobalRequest = 126, - CrstReJITSharedDomainTable = 127, - CrstRemoting = 128, - CrstRetThunkCache = 129, - CrstRWLock = 130, - CrstSavedExceptionInfo = 131, - CrstSaveModuleProfileData = 132, - CrstSecurityPolicyCache = 133, - CrstSecurityPolicyInit = 134, - CrstSecurityStackwalkCache = 135, - CrstSharedAssemblyCreate = 136, - CrstSharedBaseDomain = 137, - CrstSigConvert = 138, - CrstSingleUseLock = 139, - CrstSpecialStatics = 140, - CrstSqmManager = 141, - CrstStackSampler = 142, - CrstStressLog = 143, - CrstStrongName = 144, - CrstStubCache = 145, - CrstStubDispatchCache = 146, - CrstStubUnwindInfoHeapSegments = 147, - CrstSyncBlockCache = 148, - CrstSyncHashLock = 149, - CrstSystemBaseDomain = 150, - CrstSystemDomain = 151, - CrstSystemDomainDelayedUnloadList = 152, - CrstThreadIdDispenser = 153, - CrstThreadpoolEventCache = 154, - CrstThreadpoolTimerQueue = 155, - CrstThreadpoolWaitThreads = 156, - CrstThreadpoolWorker = 157, - CrstThreadStaticDataHashTable = 158, - CrstThreadStore = 159, - CrstTPMethodTable = 160, - CrstTypeEquivalenceMap = 161, - CrstTypeIDMap = 162, - CrstUMEntryThunkCache = 163, - CrstUMThunkHash = 164, - CrstUniqueStack = 165, - CrstUnresolvedClassLock = 166, - CrstUnwindInfoTableLock = 167, - CrstVSDIndirectionCellLock = 168, - CrstWinRTFactoryCache = 169, - CrstWrapperTemplate = 170, - kNumberOfCrstTypes = 171 + CrstGCCover = 67, + CrstGCMemoryPressure = 68, + CrstGlobalStrLiteralMap = 69, + CrstHandleTable = 70, + CrstHostAssemblyMap = 71, + CrstHostAssemblyMapAdd = 72, + CrstIbcProfile = 73, + CrstIJWFixupData = 74, + CrstIJWHash = 75, + CrstILFingerprintCache = 76, + CrstILStubGen = 77, + CrstInlineTrackingMap = 78, + CrstInstMethodHashTable = 79, + CrstInterfaceVTableMap = 80, + CrstInterop = 81, + CrstInteropData = 82, + CrstIOThreadpoolWorker = 83, + CrstIsJMCMethod = 84, + CrstISymUnmanagedReader = 85, + CrstJit = 86, + CrstJitGenericHandleCache = 87, + CrstJitPerf = 88, + CrstJumpStubCache = 89, + CrstLeafLock = 90, + CrstListLock = 91, + CrstLoaderAllocator = 92, + CrstLoaderAllocatorReferences = 93, + CrstLoaderHeap = 94, + CrstMda = 95, + CrstMetadataTracker = 96, + CrstModIntPairList = 97, + CrstModule = 98, + CrstModuleFixup = 99, + CrstModuleLookupTable = 100, + CrstMulticoreJitHash = 101, + CrstMulticoreJitManager = 102, + CrstMUThunkHash = 103, + CrstNativeBinderInit = 104, + CrstNativeImageCache = 105, + CrstNls = 106, + CrstNotifyGdb = 107, + CrstObjectList = 108, + CrstOnEventManager = 109, + CrstPatchEntryPoint = 110, + CrstPEFileSecurityManager = 111, + CrstPEImage = 112, + CrstPEImagePDBStream = 113, + CrstPendingTypeLoadEntry = 114, + CrstPinHandle = 115, + CrstPinnedByrefValidation = 116, + CrstProfilerGCRefDataFreeList = 117, + CrstProfilingAPIStatus = 118, + CrstPublisherCertificate = 119, + CrstRCWCache = 120, + CrstRCWCleanupList = 121, + CrstRCWRefCache = 122, + CrstReadyToRunEntryPointToMethodDescMap = 123, + CrstReDacl = 124, + CrstReflection = 125, + CrstReJITDomainTable = 126, + CrstReJITGlobalRequest = 127, + CrstReJITSharedDomainTable = 128, + CrstRemoting = 129, + CrstRetThunkCache = 130, + CrstRWLock = 131, + CrstSavedExceptionInfo = 132, + CrstSaveModuleProfileData = 133, + CrstSecurityPolicyCache = 134, + CrstSecurityPolicyInit = 135, + CrstSecurityStackwalkCache = 136, + CrstSharedAssemblyCreate = 137, + CrstSharedBaseDomain = 138, + CrstSigConvert = 139, + CrstSingleUseLock = 140, + CrstSpecialStatics = 141, + CrstSqmManager = 142, + CrstStackSampler = 143, + CrstStressLog = 144, + CrstStrongName = 145, + CrstStubCache = 146, + CrstStubDispatchCache = 147, + CrstStubUnwindInfoHeapSegments = 148, + CrstSyncBlockCache = 149, + CrstSyncHashLock = 150, + CrstSystemBaseDomain = 151, + CrstSystemDomain = 152, + CrstSystemDomainDelayedUnloadList = 153, + CrstThreadIdDispenser = 154, + CrstThreadpoolEventCache = 155, + CrstThreadpoolTimerQueue = 156, + CrstThreadpoolWaitThreads = 157, + CrstThreadpoolWorker = 158, + CrstThreadStaticDataHashTable = 159, + CrstThreadStore = 160, + CrstTPMethodTable = 161, + CrstTypeEquivalenceMap = 162, + CrstTypeIDMap = 163, + CrstUMEntryThunkCache = 164, + CrstUMThunkHash = 165, + CrstUniqueStack = 166, + CrstUnresolvedClassLock = 167, + CrstUnwindInfoTableLock = 168, + CrstVSDIndirectionCellLock = 169, + CrstWinRTFactoryCache = 170, + CrstWrapperTemplate = 171, + kNumberOfCrstTypes = 172 }; #endif // __CRST_TYPES_INCLUDED @@ -265,6 +266,7 @@ int g_rgCrstLevelMap[] = 4, // CrstFusionPolicyConfigPool 5, // CrstFusionSingleUse 6, // CrstFusionWarningLog + 3, // CrstGCCover 0, // CrstGCMemoryPressure 11, // CrstGlobalStrLiteralMap 1, // CrstHandleTable @@ -441,6 +443,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstFusionPolicyConfigPool", "CrstFusionSingleUse", "CrstFusionWarningLog", + "CrstGCCover", "CrstGCMemoryPressure", "CrstGlobalStrLiteralMap", "CrstHandleTable", diff --git a/src/inc/switches.h b/src/inc/switches.h index 8141cf203036..cd404d5c5347 100644 --- a/src/inc/switches.h +++ b/src/inc/switches.h @@ -114,6 +114,12 @@ #define HAVE_GCCOVER #endif +// Some platforms may see spurious AVs when GcCoverage is enabled because of races. +// Enable further processing to see if they recur. +#if defined(HAVE_GCCOVER) && (defined(_TARGET_X86_) || defined(_TARGET_AMD64_)) && !defined(FEATURE_PAL) +#define GCCOVER_TOLERATE_SPURIOUS_AV +#endif + //Turns on a startup delay to allow simulation of slower and faster startup times. #define ENABLE_STARTUP_DELAY diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 0f2efd64c0fc..1d142934415f 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -1124,6 +1124,10 @@ void EEStartupHelper(COINITIEE fFlags) #endif // _DEBUG +#ifdef HAVE_GCCOVER + MethodDesc::Init(); +#endif + #endif // !CROSSGEN_COMPILE ErrExit: ; diff --git a/src/vm/dynamicmethod.cpp b/src/vm/dynamicmethod.cpp index 806ab578050c..7778924efbda 100644 --- a/src/vm/dynamicmethod.cpp +++ b/src/vm/dynamicmethod.cpp @@ -280,6 +280,11 @@ DynamicMethodDesc* DynamicMethodTable::GetDynamicMethod(BYTE *psig, DWORD sigSiz pNewMD->m_pszDebugClassName = (LPUTF8)"dynamicclass"; pNewMD->m_pszDebugMethodSignature = "DynamicMethod Signature not available"; #endif // _DEBUG + +#ifdef HAVE_GCCOVER + pNewMD->m_GcCover = NULL; +#endif + pNewMD->SetNotInline(TRUE); pNewMD->GetLCGMethodResolver()->Reset(); diff --git a/src/vm/excep.cpp b/src/vm/excep.cpp index e5c024e89c97..d4e00d635dc8 100644 --- a/src/vm/excep.cpp +++ b/src/vm/excep.cpp @@ -6811,34 +6811,39 @@ DWORD GetGcMarkerExceptionCode(LPVOID ip) } // Did we hit an DO_A_GC_HERE marker in JITted code? -bool IsGcMarker(DWORD exceptionCode, CONTEXT *pContext) +bool IsGcMarker(CONTEXT* pContext, EXCEPTION_RECORD *pExceptionRecord) { + DWORD exceptionCode = pExceptionRecord->ExceptionCode; #ifdef HAVE_GCCOVER WRAPPER_NO_CONTRACT; if (GCStress::IsEnabled()) { -#ifdef _TARGET_X86_ - // on x86 we can't suspend EE to update the GC marker instruction so - // we update it directly without suspending. this can sometimes yield - // a STATUS_ACCESS_VIOLATION instead of STATUS_CLR_GCCOVER_CODE. in - // this case we let the AV through and retry the instruction. we'll - // track the IP of the instruction that generated an AV so we don't - // mix up a real AV with a "fake" AV. - // see comments in function DoGcStress for more details on this race. - // also make sure that the thread is actually in managed code since AVs - // outside of of JIT code will never be potential GC markers +#if defined(GCCOVER_TOLERATE_SPURIOUS_AV) + + // We sometimes can't suspend the EE to update the GC marker instruction so + // we update it directly without suspending. This can sometimes yield + // a STATUS_ACCESS_VIOLATION instead of STATUS_CLR_GCCOVER_CODE. In + // this case we let the AV through and retry the instruction as hopefully + // the race will have resolved. We'll track the IP of the instruction + // that generated an AV so we don't mix up a real AV with a "fake" AV. + // + // See comments in function DoGcStress for more details on this race. + // + // Note these "fake" AVs will be reported by the kernel as reads from + // address 0xF...F so we also use that as a screen. Thread* pThread = GetThread(); if (exceptionCode == STATUS_ACCESS_VIOLATION && GCStress::IsEnabled() && + pExceptionRecord->ExceptionInformation[0] == 0 && + pExceptionRecord->ExceptionInformation[1] == ~0 && pThread->GetLastAVAddress() != (LPVOID)GetIP(pContext) && - pThread->PreemptiveGCDisabled() && !IsIPInEE((LPVOID)GetIP(pContext))) { pThread->SetLastAVAddress((LPVOID)GetIP(pContext)); return true; } -#endif // _TARGET_X86_ +#endif // defined(GCCOVER_TOLERATE_SPURIOUS_AV) if (exceptionCode == STATUS_CLR_GCCOVER_CODE) { @@ -7737,7 +7742,7 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandlerPhase3(PEXCEPTION_POINTERS pExcepti // NOTE: this is effectively ifdef (_TARGET_AMD64_ || _TARGET_ARM_), and does not actually trigger // a GC. This will redirect the exception context to a stub which will // push a frame and cause GC. - if (IsGcMarker(exceptionCode, pContext)) + if (IsGcMarker(pContext, pExceptionRecord)) { return VEH_CONTINUE_EXECUTION;; } @@ -8161,11 +8166,11 @@ LONG WINAPI CLRVectoredExceptionHandlerShim(PEXCEPTION_POINTERS pExceptionInfo) #ifdef USE_REDIRECT_FOR_GCSTRESS // This is AMD64 & ARM specific as the macro above is defined for AMD64 & ARM only - bIsGCMarker = IsGcMarker(dwCode, pExceptionInfo->ContextRecord); + bIsGCMarker = IsGcMarker(pExceptionInfo->ContextRecord, pExceptionInfo->ExceptionRecord); #elif defined(_TARGET_X86_) && defined(HAVE_GCCOVER) // This is the equivalent of the check done in COMPlusFrameHandler, incase the exception is // seen by VEH first on x86. - bIsGCMarker = IsGcMarker(dwCode, pExceptionInfo->ContextRecord); + bIsGCMarker = IsGcMarker(pExceptionInfo->ContextRecord, pExceptionInfo->ExceptionRecord); #endif // USE_REDIRECT_FOR_GCSTRESS // Do not update the TLS with exception details for exceptions pertaining to GCStress diff --git a/src/vm/excep.h b/src/vm/excep.h index 6df9a98452ab..8c49071a811b 100644 --- a/src/vm/excep.h +++ b/src/vm/excep.h @@ -766,7 +766,7 @@ void CPFH_AdjustContextForThreadSuspensionRace(T_CONTEXT *pContext, Thread *pThr #endif // _TARGET_X86_ DWORD GetGcMarkerExceptionCode(LPVOID ip); -bool IsGcMarker(DWORD exceptionCode, T_CONTEXT *pContext); +bool IsGcMarker(T_CONTEXT *pContext, EXCEPTION_RECORD *pExceptionRecord); void InitSavedExceptionInfo(); diff --git a/src/vm/exceptionhandling.cpp b/src/vm/exceptionhandling.cpp index 38a2a4341943..f78f9c3e1ed7 100644 --- a/src/vm/exceptionhandling.cpp +++ b/src/vm/exceptionhandling.cpp @@ -936,7 +936,7 @@ ProcessCLRException(IN PEXCEPTION_RECORD pExceptionRecord // { #ifndef USE_REDIRECT_FOR_GCSTRESS - if (IsGcMarker(pExceptionRecord->ExceptionCode, pContextRecord)) + if (IsGcMarker(pContextRecord, pExceptionRecord)) { returnDisposition = ExceptionContinueExecution; goto lExit; @@ -5227,7 +5227,7 @@ BOOL HandleHardwareException(PAL_SEHException* ex) // A hardware exception is handled only if it happened in a jitted code or // in one of the JIT helper functions (JIT_MemSet, ...) PCODE controlPc = GetIP(ex->GetContextRecord()); - if (ExecutionManager::IsManagedCode(controlPc) && IsGcMarker(ex->GetExceptionRecord()->ExceptionCode, ex->GetContextRecord())) + if (ExecutionManager::IsManagedCode(controlPc) && IsGcMarker(ex->GetContextRecord(), ex->GetExceptionRecord())) { // Exception was handled, let the signal handler return to the exception context. Some registers in the context can // have been modified by the GC. diff --git a/src/vm/gccover.cpp b/src/vm/gccover.cpp index d61a168f4712..66d45cf6ee90 100644 --- a/src/vm/gccover.cpp +++ b/src/vm/gccover.cpp @@ -144,69 +144,11 @@ void SetupGcCoverage(MethodDesc* pMD, BYTE* methodStartPtr) { } #endif - if (pMD->m_GcCover) - return; - - // - // In the gcstress=4 case, we can easily piggy-back onto the JITLock because we - // have a JIT operation that needs to take that lock already. But in the case of - // gcstress=8, we cannot do this because the code already exists, and if gccoverage - // were not in the picture, we're happy to race to do the prestub work because all - // threads end up with the same answer and don't leak any resources in the process. - // - // However, with gccoverage, we need to exclude all other threads from mucking with - // the code while we fill in the breakpoints and make our shadow copy of the code. - // - { - BaseDomain* pDomain = pMD->GetDomain(); - // Enter the global lock which protects the list of all functions being JITd - JitListLock::LockHolder pJitLock(pDomain->GetJitLock()); - - - // It is possible that another thread stepped in before we entered the global lock for the first time. - if (pMD->m_GcCover) - { - // We came in to jit but someone beat us so return the jitted method! - return; - } - else - { - const char *description = "jit lock (gc cover)"; -#ifdef _DEBUG - description = pMD->m_pszDebugMethodName; -#endif - ReleaseHolder pEntry(JitListLockEntry::Find(pJitLock, pMD->GetInitialCodeVersion(), description)); - - // We have an entry now, we can release the global lock - pJitLock.Release(); - - // Take the entry lock - { - JitListLockEntry::LockHolder pEntryLock(pEntry, FALSE); - - if (pEntryLock.DeadlockAwareAcquire()) - { - // we have the lock... - } - else - { - // Note that at this point we don't have the lock, but that's OK because the - // thread which does have the lock is blocked waiting for us. - } - - if (pMD->m_GcCover) - { - return; - } - - PCODE codeStart = (PCODE) methodStartPtr; - - SetupAndSprinkleBreakpointsForJittedMethod(pMD, - codeStart - ); - } - } - } + // We should be the first and only thread to instrument this + // method for gc coverage. + _ASSERTE(!pMD->m_GcCover); + PCODE codeStart = (PCODE) methodStartPtr; + SetupAndSprinkleBreakpointsForJittedMethod(pMD, codeStart); } #ifdef FEATURE_PREJIT @@ -1305,6 +1247,8 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr) #else *(BYTE *)instrPtr = *savedInstrPtr; #endif + + FlushInstructionCache(GetCurrentProcess(), (LPCVOID)instrPtr, 4); } BOOL OnGcCoverageInterrupt(PCONTEXT regs) @@ -1677,7 +1621,8 @@ void DoGcStress (PCONTEXT regs, MethodDesc *pMD) } // Must flush instruction cache before returning as instruction has been modified. - FlushInstructionCache(GetCurrentProcess(), (LPCVOID)instrPtr, 6); + // Note this needs to reach beyond the call by up to 4 bytes. + FlushInstructionCache(GetCurrentProcess(), (LPCVOID)instrPtr, 10); // It's not GC safe point, the GC Stress instruction is // already commited and interrupt is already put at next instruction so we just return. diff --git a/src/vm/i386/excepx86.cpp b/src/vm/i386/excepx86.cpp index 9f19d47440a9..9f558c457e08 100644 --- a/src/vm/i386/excepx86.cpp +++ b/src/vm/i386/excepx86.cpp @@ -1347,7 +1347,7 @@ CPFH_FirstPassHandler(EXCEPTION_RECORD *pExceptionRecord, // Check to see if this exception is due to GCStress. Since the GCStress mechanism only injects these faults // into managed code, we only need to check for them in CPFH_FirstPassHandler. // - if (IsGcMarker(exceptionCode, pContext)) + if (IsGcMarker(pContext, pExceptionRecord)) { return ExceptionContinueExecution; } @@ -1675,7 +1675,7 @@ EXCEPTION_HANDLER_IMPL(COMPlusFrameHandler) // it is very easy to trash the last error. For example, a p/invoke called a native method // which sets last error. Before we getting the last error in the IL stub, it is trashed here DWORD dwLastError = GetLastError(); - fIsGCMarker = IsGcMarker(pExceptionRecord->ExceptionCode, pContext); + fIsGCMarker = IsGcMarker(pContext, pExceptionRecord); if (!fIsGCMarker) { SaveCurrentExceptionInfo(pExceptionRecord, pContext); @@ -3693,12 +3693,11 @@ AdjustContextForVirtualStub( pExceptionRecord->ExceptionAddress = (PVOID)callsite; SetIP(pContext, callsite); -#ifdef HAVE_GCCOVER +#if defined(GCCOVER_TOLERATE_SPURIOUS_AV) // Modify LastAVAddress saved in thread to distinguish between fake & real AV // See comments in IsGcMarker in file excep.cpp for more details pThread->SetLastAVAddress((LPVOID)GetIP(pContext)); -#endif - +#endif // defined(GCCOVER_TOLERATE_SPURIOUS_AV) // put ESP back to what it was before the call. SetSP(pContext, dac_cast(dac_cast(GetSP(pContext)) + sizeof(void*))); diff --git a/src/vm/method.hpp b/src/vm/method.hpp index c1316d06c3b7..fd91631214b1 100644 --- a/src/vm/method.hpp +++ b/src/vm/method.hpp @@ -1876,6 +1876,14 @@ class MethodDesc PCODE JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, JitListLockEntry* pEntry); PCODE JitCompileCodeLocked(PrepareCodeConfig* pConfig, JitListLockEntry* pLockEntry, ULONG* pSizeOfCode, CORJIT_FLAGS* pFlags); #endif // DACCESS_COMPILE + +#ifdef HAVE_GCCOVER +private: + static CrstStatic m_GCCoverCrst; + +public: + static void Init(); +#endif }; #ifndef DACCESS_COMPILE diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp index 507f8d3d0061..532f04ffc145 100644 --- a/src/vm/prestub.cpp +++ b/src/vm/prestub.cpp @@ -65,6 +65,16 @@ EXTERN_C void MarkMethodNotPitchingCandidate(MethodDesc* pMD); EXTERN_C void STDCALL ThePreStubPatch(); +#if defined(HAVE_GCCOVER) +CrstStatic MethodDesc::m_GCCoverCrst; + +void MethodDesc::Init() +{ + m_GCCoverCrst.Init(CrstGCCover); +} + +#endif + //========================================================================== PCODE MethodDesc::DoBackpatch(MethodTable * pMT, MethodTable *pDispatchingMT, BOOL fFullBackPatch) @@ -874,32 +884,49 @@ PCODE MethodDesc::JitCompileCodeLocked(PrepareCodeConfig* pConfig, JitListLockEn } _ASSERTE(pCode != NULL); - - // Aside from rejit, performing a SetNativeCodeInterlocked at this point - // generally ensures that there is only one winning version of the native - // code. This also avoid races with profiler overriding ngened code (see - // matching SetNativeCodeInterlocked done after - // JITCachedFunctionSearchStarted) + +#ifdef HAVE_GCCOVER + // Instrument for coverage before trying to publish this version + // of the code as the native code, to avoid other threads seeing + // partially instrumented methods. + if (GCStress::IsEnabled()) { - if (!pConfig->SetNativeCode(pCode, &pOtherCode)) + // Do the instrumentation and publish atomically, so that the + // instrumentation data always matches the published code. + CrstHolder gcCoverLock(&m_GCCoverCrst); + + // Make sure no other thread has stepped in before us. + if ((pOtherCode = pConfig->IsJitCancellationRequested())) { - // Another thread beat us to publishing its copy of the JITted code. return pOtherCode; } -#if defined(FEATURE_JIT_PITCHING) - else + + SetupGcCoverage(this, (BYTE*)pCode); + + // This thread should always win the publishing race + // since we're under a lock. + if (!pConfig->SetNativeCode(pCode, &pOtherCode)) { - SavePitchingCandidate(this, *pSizeOfCode); + _ASSERTE(!"GC Cover native code publish failed"); } -#endif } + else +#endif // HAVE_GCCOVER -#ifdef HAVE_GCCOVER - if (GCStress::IsEnabled()) + // Aside from rejit, performing a SetNativeCodeInterlocked at this point + // generally ensures that there is only one winning version of the native + // code. This also avoid races with profiler overriding ngened code (see + // matching SetNativeCodeInterlocked done after + // JITCachedFunctionSearchStarted) + if (!pConfig->SetNativeCode(pCode, &pOtherCode)) { - SetupGcCoverage(this, (BYTE*)pCode); + // Another thread beat us to publishing its copy of the JITted code. + return pOtherCode; } -#endif // HAVE_GCCOVER + +#if defined(FEATURE_JIT_PITCHING) + SavePitchingCandidate(this, *pSizeOfCode); +#endif // We succeeded in jitting the code, and our jitted code is the one that's going to run now. pEntry->m_hrResultCode = S_OK; diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index 283c6299cad1..71ddb6560be5 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -1615,9 +1615,9 @@ Thread::Thread() #ifdef HAVE_GCCOVER m_pbDestCode = NULL; m_pbSrcCode = NULL; -#ifdef _TARGET_X86_ +#if defined(GCCOVER_TOLERATE_SPURIOUS_AV) m_pLastAVAddress = NULL; -#endif // _TARGET_X86_ +#endif // defined(GCCOVER_TOLERATE_SPURIOUS_AV) #endif // HAVE_GCCOVER m_fCompletionPortDrained = FALSE; diff --git a/src/vm/threads.h b/src/vm/threads.h index be7ce6afd55c..7dd98b15f06a 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -4813,9 +4813,9 @@ class Thread: public IUnknown private: BYTE* m_pbDestCode; BYTE* m_pbSrcCode; -#ifdef _TARGET_X86_ +#if defined(GCCOVER_TOLERATE_SPURIOUS_AV) LPVOID m_pLastAVAddress; -#endif // _TARGET_X86_ +#endif // defined(GCCOVER_TOLERATE_SPURIOUS_AV) public: void CommitGCStressInstructionUpdate(); @@ -4841,7 +4841,7 @@ class Thread: public IUnknown m_pbDestCode = NULL; m_pbSrcCode = NULL; } -#ifdef _TARGET_X86_ +#if defined(GCCOVER_TOLERATE_SPURIOUS_AV) void SetLastAVAddress(LPVOID address) { LIMITED_METHOD_CONTRACT; @@ -4852,7 +4852,7 @@ class Thread: public IUnknown LIMITED_METHOD_CONTRACT; return m_pLastAVAddress; } -#endif // _TARGET_X86_ +#endif // defined(GCCOVER_TOLERATE_SPURIOUS_AV) #endif // HAVE_GCCOVER #if defined(_DEBUG) && defined(FEATURE_STACK_PROBE) From 03d8d9aaef516ac77c222de2326a58baddb2eb09 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 18 Apr 2018 01:42:35 -0700 Subject: [PATCH 2/2] restore bailout clause in SetupGcCoverage --- src/vm/gccover.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/vm/gccover.cpp b/src/vm/gccover.cpp index 66d45cf6ee90..ca9168788723 100644 --- a/src/vm/gccover.cpp +++ b/src/vm/gccover.cpp @@ -144,9 +144,29 @@ void SetupGcCoverage(MethodDesc* pMD, BYTE* methodStartPtr) { } #endif - // We should be the first and only thread to instrument this - // method for gc coverage. - _ASSERTE(!pMD->m_GcCover); + // Ideally we would assert here that m_GcCover is NULL. + // + // However, we can't do that (at least not yet), because we may + // invoke this method more than once on a given + // MethodDesc. Examples include prejitted methods and rejitted + // methods. + // + // In the prejit case, we can't safely re-instrument an already + // instrumented method. By bailing out here, we will use the + // original instrumentation, which should still be valid as + // the method code has not changed. + // + // In the rejit case, the old method code may still be active and + // instrumented, so we need to preserve that gc cover info. By + // bailing out here we will skip instrumenting the rejitted native + // code, and since the rejitted method does not get instrumented + // we should be able to tolerate that the gc cover info does not + // match. + if (pMD->m_GcCover) + { + return; + } + PCODE codeStart = (PCODE) methodStartPtr; SetupAndSprinkleBreakpointsForJittedMethod(pMD, codeStart); }