From 3c2f24ab5c9c360d14f6caa7efb6acf667da9bdf Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 25 Sep 2020 18:38:18 -0700 Subject: [PATCH 01/10] Initial version of class profiling for PGO Add support to the jit and runtime so that PGO can determine the distribution of classes at virtual and indirect call sites. Use this information when jitting to enable guarded devirtualization, if there is a suitably likely class to guess for. Enable by setting: ``` COMPlus_TieredCompilation=1 COMPlus_TieredPGO=1 COMPlus_JitClassProfiling=1 COMPlus_JitEnableGuardedDevirtualization=1 ``` impact can be enhanced by also setting ``` COMPlus_TC_QuickJitForLoops=1 ``` to allow more methods to pass through Tier0. --- .../templates/runtimes/run-test-job.yml | 1 + .../superpmi-shared/icorjitinfoimpl.h | 9 + .../superpmi/superpmi-shared/lwmlist.h | 1 + .../superpmi-shared/methodcontext.cpp | 42 +++ .../superpmi/superpmi-shared/methodcontext.h | 20 + .../superpmi-shim-collector/icorjitinfo.cpp | 15 + .../superpmi-shim-counter/icorjitinfo.cpp | 13 + .../superpmi-shim-simple/icorjitinfo.cpp | 12 + .../ToolBox/superpmi/superpmi/icorjitinfo.cpp | 13 + src/coreclr/src/inc/corinfo.h | 1 + src/coreclr/src/inc/corjit.h | 19 + src/coreclr/src/inc/jithelpers.h | 1 + src/coreclr/src/jit/block.h | 3 +- src/coreclr/src/jit/compiler.cpp | 1 + src/coreclr/src/jit/compiler.h | 11 +- src/coreclr/src/jit/flowgraph.cpp | 353 +++++++++++++++--- src/coreclr/src/jit/gentree.h | 2 + src/coreclr/src/jit/importer.cpp | 234 ++++++++++-- .../src/jit/indirectcalltransformer.cpp | 30 +- src/coreclr/src/jit/inline.h | 20 +- src/coreclr/src/jit/jitconfigvalues.h | 6 + src/coreclr/src/jit/patchpoint.cpp | 7 +- src/coreclr/src/jit/utils.h | 5 + src/coreclr/src/vm/jithelpers.cpp | 68 ++++ src/coreclr/src/vm/jitinterface.cpp | 67 +++- src/coreclr/src/vm/jitinterface.h | 16 + src/coreclr/src/vm/pgo.cpp | 328 +++++++++++++++- src/coreclr/src/vm/pgo.h | 18 +- src/coreclr/src/zap/zapinfo.cpp | 10 + src/coreclr/src/zap/zapinfo.h | 7 + src/tests/Common/testenvironment.proj | 2 + 31 files changed, 1195 insertions(+), 140 deletions(-) diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 5fd480b3a72504..66f3b40d7e843e 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -472,6 +472,7 @@ jobs: - jitobjectstackallocation - jitpgo - jitpgo_inline + - jitpgo_classes ${{ if in(parameters.testGroup, 'ilasm') }}: scenarios: - ilasmroundtrip diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h index 0a2ea7b63f6495..9f62dd62460852 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h @@ -993,6 +993,15 @@ HRESULT getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, BlockCounts** pBlockCounts, UINT32 * pNumRuns); +// Get the likely implementing class for a virtual call or interface call made by ftnHnd +// at the indicated IL offset. baseHnd is the interface class or base class for the method +// being called. +CORINFO_CLASS_HANDLE getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfCases); + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h index 3355f0149ec71e..803eae030a4528 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h @@ -100,6 +100,7 @@ LWM(GetJitFlags, DWORD, DD) LWM(GetJitTimeLogFilename, DWORD, DWORD) LWM(GetJustMyCodeHandle, DWORDLONG, DLDL) LWM(GetLazyStringLiteralHelper, DWORDLONG, DWORD) +LWM(GetLikelyClass, Agnostic_GetLikelyClass, Agnostic_GetLikelyClassResult) LWM(GetLocationOfThisType, DWORDLONG, Agnostic_CORINFO_LOOKUP_KIND) LWM(GetMethodAttribs, DWORDLONG, DWORD) LWM(GetMethodClass, DWORDLONG, DWORDLONG) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index 054a60e09860ca..43131ddf4c1291 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5265,6 +5265,48 @@ HRESULT MethodContext::repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnH return result; } +void MethodContext::recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE result, UINT32* pLikelihood, UINT32* pNumberOfClasses) +{ + if (GetLikelyClass == nullptr) + GetLikelyClass = new LightWeightMap(); + + Agnostic_GetLikelyClass key; + ZeroMemory(&key, sizeof(Agnostic_GetLikelyClass)); + + key.ftnHnd = (DWORDLONG) ftnHnd; + key.baseHnd = (DWORDLONG) baseHnd; + key.ilOffset = (DWORD) ilOffset; + + Agnostic_GetLikelyClassResult value; + ZeroMemory(&value, sizeof(Agnostic_GetLikelyClassResult)); + value.classHnd = (DWORDLONG) result; + value.likelihood = *pLikelihood; + value.numberOfClasses = *pNumberOfClasses; + + GetLikelyClass->Add(key, value); + DEBUG_REC(dmpGetLikelyClass(key, value)); +} +void MethodContext::dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value) +{ + printf("GetLikelyClass key ftn-%016llX base-%016llX il-%u, class-%016llX likelihood-%u numberOfClasses-%u", + key.ftnHnd, key.baseHnd, key.ilOffset, value.classHnd, value.likelihood, value.numberOfClasses); +} +CORINFO_CLASS_HANDLE MethodContext::repGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses) +{ + Agnostic_GetLikelyClass key; + ZeroMemory(&key, sizeof(Agnostic_GetLikelyClass)); + key.ftnHnd = (DWORDLONG) ftnHnd; + key.baseHnd = (DWORDLONG) baseHnd; + key.ilOffset = (DWORD) ilOffset; + + Agnostic_GetLikelyClassResult value = GetLikelyClass->Get(key); + DEBUG_REP(dmpGetLikelyClass(key, value)); + + *pLikelihood = value.likelihood; + *pNumberOfClasses = value.numberOfClasses; + return (CORINFO_CLASS_HANDLE) value.classHnd; +} + void MethodContext::recMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2, CORINFO_CLASS_HANDLE result) { if (MergeClasses == nullptr) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h index 5dafd7c79456ab..158184555abdd0 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -435,6 +435,21 @@ class MethodContext DWORD numRuns; DWORD result; }; + + struct Agnostic_GetLikelyClass + { + DWORDLONG ftnHnd; + DWORDLONG baseHnd; + DWORD ilOffset; + }; + + struct Agnostic_GetLikelyClassResult + { + DWORDLONG classHnd; + DWORD likelihood; + DWORD numberOfClasses; + }; + struct Agnostic_GetProfilingHandle { DWORD bHookFunction; @@ -1193,6 +1208,10 @@ class MethodContext ICorJitInfo::BlockCounts** pBlockCounts, UINT32 * pNumRuns); + void recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE classHnd, UINT32* pLikelihood, UINT32* pNumberOfClasses); + void dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value); + CORINFO_CLASS_HANDLE repGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses); + void recMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2, CORINFO_CLASS_HANDLE result); void dmpMergeClasses(DLDL key, DWORDLONG value); CORINFO_CLASS_HANDLE repMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2); @@ -1458,6 +1477,7 @@ enum mcPackets Packet_GetJitFlags = 154, // Added 2/3/2016 Packet_GetJitTimeLogFilename = 67, Packet_GetJustMyCodeHandle = 68, + Packet_GetLikelyClass = 181, // Added 9/27/2020 Packet_GetLocationOfThisType = 69, Packet_GetMethodAttribs = 70, Packet_GetMethodClass = 71, diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp index f56e01989b7f75..116e0756c7e9ba 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -2049,6 +2049,21 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, return temp; } +// Get the likely implementing class for a virtual call or interface call made by ftnHnd +// at the indicated IL offset. baseHnd is the interface class or base class for the method +// being called. +CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfClasses) +{ + mc->cr->AddCall("getLikelyClass"); + CORINFO_CLASS_HANDLE result = original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses); + mc->recGetLikelyClass(ftnHnd, baseHnd, ilOffset, result, pLikelihood, pNumberOfClasses); + return result; +} + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp index 03a5643d6e6cf6..496dd3d04dbc64 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp @@ -1623,6 +1623,19 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); } +// Get the likely implementing class for a virtual call or interface call made by ftnHnd +// at the indicated IL offset. baseHnd is the interface class or base class for the method +// being called. +CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfClasses) +{ + mcs->AddCall("getLikelyClass"); + return original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses); +} + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp index 139937e4c03fb7..613a299c1fe403 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp @@ -1441,6 +1441,18 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); } +// Get the likely implementing class for a virtual call or interface call made by ftnHnd +// at the indicated IL offset. baseHnd is the interface class or base class for the method +// being called. +CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfClasses) +{ + return original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses); +} + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp index c8b42b49aab7cf..39aa3661c2f2c3 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp @@ -1805,6 +1805,19 @@ HRESULT MyICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, return jitInstance->mc->repGetMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); } +// Get the likely implementing class for a virtual call or interface call made by ftnHnd +// at the indicated IL offset. baseHnd is the interface class or base class for the method +// being called. +CORINFO_CLASS_HANDLE MyICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfClasses) +{ + jitInstance->mc->cr->AddCall("getLikelyClass"); + return jitInstance->mc->repGetLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses); +} + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/inc/corinfo.h b/src/coreclr/src/inc/corinfo.h index 5a4102331d7a04..8e472f3ef17b74 100644 --- a/src/coreclr/src/inc/corinfo.h +++ b/src/coreclr/src/inc/corinfo.h @@ -623,6 +623,7 @@ enum CorInfoHelpFunc CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame CORINFO_HELP_PATCHPOINT, // Notify runtime that code has reached a patchpoint + CORINFO_HELP_CLASSPROFILE, // Update class profile for a call site CORINFO_HELP_COUNT, }; diff --git a/src/coreclr/src/inc/corjit.h b/src/coreclr/src/inc/corjit.h index 609bfc64b893ed..d58fb9a0fb25d8 100644 --- a/src/coreclr/src/inc/corjit.h +++ b/src/coreclr/src/inc/corjit.h @@ -251,6 +251,14 @@ class ICorJitInfo : public ICorDynamicInfo UINT32 ExecutionCount; }; + struct ClassProfile + { + enum { SIZE = 8, SAMPLE_INTERVAL = 32 }; + UINT32 ILOffset; + UINT32 Count; + CORINFO_CLASS_HANDLE ClassTable[SIZE]; + }; + // allocate a basic block profile buffer where execution counts will be stored // for jitted basic blocks. virtual HRESULT allocMethodBlockCounts ( @@ -267,6 +275,17 @@ class ICorJitInfo : public ICorDynamicInfo UINT32 * pNumRuns // pointer to the total number of profile scenarios run ) = 0; + // Get the likely implementing class for a virtual call or interface call made by ftnHnd + // at the indicated IL offset. baseHnd is the interface class or base class for the method + // being called. + virtual CORINFO_CLASS_HANDLE getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32 * pLikelihood, // estimated likelihood of the class (0...100) + UINT32 * pNumberOfClasses // estimated number of possible classes + ) = 0; + // Associates a native call site, identified by its offset in the native code stream, with // the signature information and method handle the JIT used to lay out the call site. If // the call site has no signature information (e.g. a helper call) or has no method handle diff --git a/src/coreclr/src/inc/jithelpers.h b/src/coreclr/src/inc/jithelpers.h index a1f6878930188a..38f518251cf5de 100644 --- a/src/coreclr/src/inc/jithelpers.h +++ b/src/coreclr/src/inc/jithelpers.h @@ -355,6 +355,7 @@ #endif JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_CLASSPROFILE, JIT_ClassProfile, CORINFO_HELP_SIG_REG_ONLY) #undef JITHELPER #undef DYNAMICJITHELPER diff --git a/src/coreclr/src/jit/block.h b/src/coreclr/src/jit/block.h index 949b2460c6b1ae..b1e4e67fd28ee1 100644 --- a/src/coreclr/src/jit/block.h +++ b/src/coreclr/src/jit/block.h @@ -448,6 +448,7 @@ struct BasicBlock : private LIR::Range #define BBF_BACKWARD_JUMP_TARGET MAKE_BBFLAG(36) // Block is a target of a backward jump #define BBF_PATCHPOINT MAKE_BBFLAG(37) // Block is a patchpoint +#define BBF_HAS_VIRTUAL_CALL MAKE_BBFLAG(38) // BB contains a call needing a class profile // clang-format on @@ -492,7 +493,7 @@ struct BasicBlock : private LIR::Range #define BBF_SPLIT_GAINED \ (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \ BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | \ - BBF_HAS_VTABREF) + BBF_HAS_VTABREF | BBF_HAS_VIRTUAL_CALL) #ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0); diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index bc3be906321a3f..3a6ea5006b268b 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -5982,6 +5982,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, info.compNativeCodeSize = 0; info.compTotalHotCodeSize = 0; info.compTotalColdCodeSize = 0; + info.compClassProbeCount = 0; compHasBackwardJump = false; diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index d8de6c9afa075b..eb0c2a7cee7bfd 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -3779,7 +3779,8 @@ class Compiler CORINFO_CONTEXT_HANDLE* contextHandle, CORINFO_CONTEXT_HANDLE* exactContextHandle, bool isLateDevirtualization, - bool isExplicitTailCall); + bool isExplicitTailCall, + IL_OFFSETX ilOffset = BAD_IL_OFFSET); //========================================================================= // PROTECTED @@ -6697,6 +6698,7 @@ class Compiler #define OMF_HAS_EXPRUNTIMELOOKUP 0x00000100 // Method contains a runtime lookup to an expandable dictionary. #define OMF_HAS_PATCHPOINT 0x00000200 // Method contains patchpoints #define OMF_NEEDS_GCPOLLS 0x00000400 // Method needs GC polls +#define OMF_HAS_VIRTUAL_CALL 0x00000800 // Method contains call that needs a class profile bool doesMethodHaveFatPointer() { @@ -6734,7 +6736,8 @@ class Compiler CORINFO_METHOD_HANDLE methodHandle, CORINFO_CLASS_HANDLE classHandle, unsigned methodAttr, - unsigned classAttr); + unsigned classAttr, + unsigned likelihood); bool doesMethodHaveExpRuntimeLookup() { @@ -9299,6 +9302,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define CPU_ARM64 0x0400 // The generic ARM64 CPU unsigned genCPU; // What CPU are we running on + + // Number of class profile probes in this method + unsigned compClassProbeCount; + } info; // Returns true if the method being compiled returns a non-void and non-struct value. diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 5e413987707017..82d1cc7ecfb1b4 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -381,12 +381,34 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weigh return true; } +//------------------------------------------------------------------------ +// fgInstrumentMethod: add instrumentation probes to the method +// +// Note: +// +// By default this instruments each non-internal block with +// a counter probe. +// +// Probes data is held in a runtime-allocated slab of Entries, with +// each Entry an (IL offset, count) pair. This method determines +// the number of Entrys needed and initializes each entry's IL offset. +// +// Options (many not yet implemented): +// * suppress count instrumentation for methods with +// a single block, or +// * instrument internal blocks (requires same internal expansions +// for BBOPT and BBINSTR, not yet guaranteed) +// * use spanning tree for minimal count probing +// * add class profile probes for virtual and interface call sites +// * record indirection cells for VSD calls +// void Compiler::fgInstrumentMethod() { noway_assert(!compIsForInlining()); // Count the number of basic blocks in the method - + // that will get block count probes. + // int countOfBlocks = 0; BasicBlock* block; for (block = fgFirstBB; (block != nullptr); block = block->bbNext) @@ -398,45 +420,254 @@ void Compiler::fgInstrumentMethod() countOfBlocks++; } + // We've alread counted the number of class probes + // when importing. + // + int countOfCalls = info.compClassProbeCount; + + // Optionally bail out, if there are less than three blocks an no call sites to profile + // One block is common. We don't expect to see zero or two blcoks here. + // + // Note we have to at least visit all the profile call sites to properly restore their + // stub addresses. So we can't bail out early if there are any of these. + // + if ((JitConfig.JitMinimalProfiling() > 0) && (countOfBlocks < 3) && (countOfCalls == 0)) + { + JITDUMP("Not instrumenting method: %d blocks and %d calls\n", countOfBlocks, countOfCalls); + assert(countOfBlocks == 1); + return; + } + + JITDUMP("Instrumenting method, %d blocks and %d calls\n", countOfBlocks, countOfCalls); + // Allocate the profile buffer + // + // Allocation is in multiples of ICorJitInfo::BlockCounts. For each profile table we need + // some multiple of these. + // + const unsigned entriesPerCall = sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); + assert(entriesPerCall * sizeof(ICorJitInfo::BlockCounts) == sizeof(ICorJitInfo::ClassProfile)); + + const unsigned totalEntries = countOfBlocks + entriesPerCall * countOfCalls; + ICorJitInfo::BlockCounts* profileBlockCountsStart = nullptr; - ICorJitInfo::BlockCounts* profileBlockCountsStart; + HRESULT res = info.compCompHnd->allocMethodBlockCounts(totalEntries, &profileBlockCountsStart); - HRESULT res = info.compCompHnd->allocMethodBlockCounts(countOfBlocks, &profileBlockCountsStart); + // We may not be able to instrument, if so we'll set this false. + // We can't just early exit, because we have to clean up calls that we might have profiled. + // + bool instrument = true; if (!SUCCEEDED(res)) { + JITDUMP("Unable to instrument -- block counter allocation failed: 0x%x\n", res); + instrument = false; // The E_NOTIMPL status is returned when we are profiling a generic method from a different assembly - if (res == E_NOTIMPL) - { - // expected failure... - } - else + if (res != E_NOTIMPL) { noway_assert(!"Error: failed to allocate profileBlockCounts"); return; } } - else - { - // For each BasicBlock (non-Internal) - // 1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data. - // 2. Add an operation that increments the ExecutionCount field at the beginning of the block. - // Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount] - // To start we initialize our current one with the first one that we allocated + ICorJitInfo::BlockCounts* profileBlockCountsEnd = &profileBlockCountsStart[countOfBlocks]; + ICorJitInfo::BlockCounts* profileEnd = &profileBlockCountsStart[totalEntries]; + + // For each BasicBlock (non-Internal) + // 1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data. + // 2. Add an operation that increments the ExecutionCount field at the beginning of the block. + // + // Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount] + // To start we initialize our current one with the first one that we allocated + // + ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart; + + for (block = fgFirstBB; (block != nullptr); block = block->bbNext) + { + // We don't want to profile any un-imported blocks // - ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart; + if ((block->bbFlags & BBF_IMPORTED) == 0) + { + continue; + } - for (block = fgFirstBB; (block != nullptr); block = block->bbNext) + // We may see class probes in internal blcoks, thanks to the + // block splitting done by the indirect call transformer. + // + if (JitConfig.JitClassProfiling() > 0) { - if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL)) + // Only works when jitting. + assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)); + + if ((block->bbFlags & BBF_HAS_VIRTUAL_CALL) != 0) { - continue; + // Would be nice to avoid having to search here by tracking + // candidates more directly. + // + JITDUMP("Scanning for calls to profile in " FMT_BB "\n", block->bbNum); + + class ClassProbeVisitor final : public GenTreeVisitor + { + public: + enum + { + DoPreOrder = true + }; + + int m_count; + ICorJitInfo::ClassProfile* m_tableBase; + bool m_instrument; + + ClassProbeVisitor(Compiler* compiler, ICorJitInfo::ClassProfile* tableBase, bool instrument) + : GenTreeVisitor(compiler) + , m_count(0) + , m_tableBase(tableBase) + , m_instrument(instrument) + { + } + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* const node = *use; + if (node->IsCall()) + { + GenTreeCall* const call = node->AsCall(); + if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT)) + { + JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", + m_compiler->dspTreeID(call), call->gtClassProfileCandidateInfo->probeIndex, + call->gtClassProfileCandidateInfo->ilOffset); + + m_count++; + + if (m_instrument) + { + // We transform the call from (CALLVIRT obj, ... args ...) to + // to + // (CALLVIRT + // (COMMA + // (ASG tmp, obj) + // (COMMA + // (CALL probe_fn tmp, &probeEntry) + // tmp))) + // ... args ...) + // + + assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF); + + // Figure out where the table is located. + // + ICorJitInfo::ClassProfile* classProfile = + &m_tableBase[call->gtClassProfileCandidateInfo->probeIndex]; + + // Grab a temp to hold the 'this' object as it will be used three times + // + unsigned const tmpNum = m_compiler->lvaGrabTemp(true DEBUGARG("class profile tmp")); + m_compiler->lvaTable[tmpNum].lvType = TYP_REF; + + // Generate the IR... + // + GenTree* const classProfileNode = + m_compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL); + GenTree* const tmpNode = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTreeCall::Use* const args = m_compiler->gtNewCallArgs(tmpNode, classProfileNode); + GenTree* const helperCallNode = + m_compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args); + GenTree* const tmpNode2 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const callCommaNode = + m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2); + GenTree* const tmpNode3 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const asgNode = m_compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, + call->gtCallThisArg->GetNode()); + GenTree* const asgCommaNode = + m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode); + + // Update the call + // + call->gtCallThisArg->SetNode(asgCommaNode); + + JITDUMP("Modified call is now\n"); + DISPTREE(call); + + // Initialize the class table + // + // Hack: we use the high bits of the offset to indicate that this record + // is the start of a class profile, and what kind of call is being profiled. + // + IL_OFFSET offset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset); + assert((int)offset >= 0); + + offset |= 0x80000000; + + if (call->IsVirtualStub()) + { + offset |= 0x40000000; + } + else + { + assert(call->IsVirtualVtable()); + } + + classProfile->ILOffset = offset; + classProfile->Count = 0; + + for (int i = 0; i < ICorJitInfo::ClassProfile::SIZE; i++) + { + classProfile->ClassTable[i] = NO_CLASS_HANDLE; + } + } + + // Restore the stub address on call, whether instrumenting or not. + // + call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr; + } + } + + return Compiler::WALK_CONTINUE; + } + }; + + // Scan the statements and add class probes + // + ClassProbeVisitor visitor(this, (ICorJitInfo::ClassProfile*)profileBlockCountsEnd, instrument); + for (Statement* stmt : block->Statements()) + { + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + } + + // Bookkeeping + // + assert(visitor.m_count <= countOfCalls); + countOfCalls -= visitor.m_count; + JITDUMP("\n%d calls remain to be visited\n", countOfCalls); } + else + { + JITDUMP("No calls to profile in " FMT_BB "\n", block->bbNum); + } + } + + // We won't need count probes in internal blocks. + // + // TODO, perhaps: profile the flow early expansion ... we would need + // some non-il based keying scheme. + // + if ((block->bbFlags & BBF_INTERNAL) != 0) + { + continue; + } + + // One less block + countOfBlocks--; + if (instrument) + { // Assign the current block's IL offset into the profile data - currentBlockCounts->ILOffset = block->bbCodeOffs; + // (make sure IL offset is sane) + // + IL_OFFSET offset = block->bbCodeOffs; + assert((int)offset >= 0); + + currentBlockCounts->ILOffset = offset; currentBlockCounts->ExecutionCount = 0; size_t addrOfCurrentExecutionCount = (size_t)¤tBlockCounts->ExecutionCount; @@ -456,57 +687,63 @@ void Compiler::fgInstrumentMethod() // Advance to the next BlockCounts tuple [ILOffset, ExecutionCount] currentBlockCounts++; - - // One less block - countOfBlocks--; } - // Check that we allocated and initialized the same number of BlockCounts tuples - noway_assert(countOfBlocks == 0); + } - // When prejitting, add the method entry callback node - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) - { - GenTree* arg; + if (!instrument) + { + return; + } + + // Check that we allocated and initialized the same number of BlockCounts tuples + // + noway_assert(countOfBlocks == 0); + noway_assert(countOfCalls == 0); + assert(currentBlockCounts == profileBlockCountsEnd); + + // When prejitting, add the method entry callback node + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + { + GenTree* arg; #ifdef FEATURE_READYTORUN_COMPILER - if (opts.IsReadyToRun()) - { - mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd); + if (opts.IsReadyToRun()) + { + mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd); - CORINFO_RESOLVED_TOKEN resolvedToken; - resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd); - resolvedToken.tokenScope = info.compScopeHnd; - resolvedToken.token = currentMethodToken; - resolvedToken.tokenType = CORINFO_TOKENKIND_Method; + CORINFO_RESOLVED_TOKEN resolvedToken; + resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd); + resolvedToken.tokenScope = info.compScopeHnd; + resolvedToken.token = currentMethodToken; + resolvedToken.tokenType = CORINFO_TOKENKIND_Method; - info.compCompHnd->resolveToken(&resolvedToken); + info.compCompHnd->resolveToken(&resolvedToken); - arg = impTokenToHandle(&resolvedToken); - } - else + arg = impTokenToHandle(&resolvedToken); + } + else #endif - { - arg = gtNewIconEmbMethHndNode(info.compMethodHnd); - } + { + arg = gtNewIconEmbMethHndNode(info.compMethodHnd); + } - GenTreeCall::Use* args = gtNewCallArgs(arg); - GenTree* call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); + GenTreeCall::Use* args = gtNewCallArgs(arg); + GenTree* call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); - // Get the address of the first blocks ExecutionCount - size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount; + // Get the address of the first blocks ExecutionCount + size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount; - // Read Basic-Block count value - GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); + // Read Basic-Block count value + GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); - // Compare Basic-Block count value against zero - GenTree* relop = gtNewOperNode(GT_NE, TYP_INT, valueNode, gtNewIconNode(0, TYP_INT)); - GenTree* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call); - GenTree* cond = gtNewQmarkNode(TYP_VOID, relop, colon); - Statement* stmt = gtNewStmt(cond); + // Compare Basic-Block count value against zero + GenTree* relop = gtNewOperNode(GT_NE, TYP_INT, valueNode, gtNewIconNode(0, TYP_INT)); + GenTree* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call); + GenTree* cond = gtNewQmarkNode(TYP_VOID, relop, colon); + Statement* stmt = gtNewStmt(cond); - fgEnsureFirstBBisScratch(); - fgInsertStmtAtEnd(fgFirstBB, stmt); - } + fgEnsureFirstBBisScratch(); + fgInsertStmtAtEnd(fgFirstBB, stmt); } } diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 6e74955d61473f..bea488d04a297e 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -155,6 +155,7 @@ enum TargetHandleType : BYTE struct BasicBlock; struct InlineCandidateInfo; struct GuardedDevirtualizationCandidateInfo; +struct ClassProfileCandidateInfo; typedef unsigned short AssertionIndex; @@ -4514,6 +4515,7 @@ struct GenTreeCall final : public GenTree // gtInlineCandidateInfo is only used when inlining methods InlineCandidateInfo* gtInlineCandidateInfo; GuardedDevirtualizationCandidateInfo* gtGuardedDevirtualizationCandidateInfo; + ClassProfileCandidateInfo* gtClassProfileCandidateInfo; void* gtStubCallStubAddr; // GTF_CALL_VIRT_STUB - these are never inlined CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers void* gtDirectCallAddress; // Used to pass direct call address between lower and codegen diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 5d57941ba41077..477d0ef0c8de51 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -8725,7 +8725,7 @@ var_types Compiler::impImportCall(OPCODE opcode, const bool isExplicitTailCall = (tailCallFlags & PREFIX_TAILCALL_EXPLICIT) != 0; const bool isLateDevirtualization = false; impDevirtualizeCall(call->AsCall(), &callInfo->hMethod, &callInfo->methodFlags, &callInfo->contextHandle, - &exactContextHnd, isLateDevirtualization, isExplicitTailCall); + &exactContextHnd, isLateDevirtualization, isExplicitTailCall, rawILOffset); } if (impIsThis(obj)) @@ -20523,6 +20523,7 @@ bool Compiler::IsMathIntrinsic(GenTree* tree) // exactContextHnd -- [OUT] updated context handle iff call devirtualized // isLateDevirtualization -- if devirtualization is happening after importation // isExplicitTailCalll -- [IN] true if we plan on using an explicit tail call +// ilOffset -- IL offset of the call // // Notes: // Virtual calls in IL will always "invoke" the base class method. @@ -20557,7 +20558,8 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, CORINFO_CONTEXT_HANDLE* contextHandle, CORINFO_CONTEXT_HANDLE* exactContextHandle, bool isLateDevirtualization, - bool isExplicitTailCall) + bool isExplicitTailCall, + IL_OFFSETX ilOffset) { assert(call != nullptr); assert(method != nullptr); @@ -20567,9 +20569,39 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // This should be a virtual vtable or virtual stub call. assert(call->IsVirtual()); - // Bail if not optimizing - if (opts.OptimizationDisabled()) + // Possibly instrument, if not optimizing. + // + if (opts.OptimizationDisabled() && (call->gtCallType != CT_INDIRECT)) { + // During importation, optionally flag this block as one that + // contains calls requiring class profiling. Ideally perhaps + // we'd just keep track of the calls themselves, so we don't + // have to search for them later. + // + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && + (JitConfig.JitClassProfiling() > 0) && !isLateDevirtualization) + { + JITDUMP("\n ... marking [%06u] in " FMT_BB " for class profile instrumentation\n", dspTreeID(call), + compCurBB->bbNum); + ClassProfileCandidateInfo* pInfo = new (this, CMK_Inlining) ClassProfileCandidateInfo; + + // Record some info needed for the class profiling probe. + // + pInfo->ilOffset = ilOffset; + pInfo->probeIndex = info.compClassProbeCount++; + pInfo->stubAddr = call->gtStubCallStubAddr; + + // note this overwrites gtCallStubAddr, so it needs to be undone + // during the instrumentation phase, or we won't generate proper + // code for vsd calls. + // + call->gtClassProfileCandidateInfo = pInfo; + + // Flag block as needing scrutiny + // + compCurBB->bbFlags |= BBF_HAS_VIRTUAL_CALL; + } + return; } @@ -20727,46 +20759,60 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - CORINFO_CLASS_HANDLE uniqueImplementingClass = NO_CLASS_HANDLE; - - // info.compCompHnd->getUniqueImplementingClass(objClass); - - if (uniqueImplementingClass == NO_CLASS_HANDLE) + // Don't try guarded devirtualiztion when we're doing late devirtualization. + // + if (isLateDevirtualization) { - JITDUMP("No unique implementor of interface %p (%s), sorry\n", dspPtr(objClass), objClassName); + JITDUMP("No guarded devirt during late devirtualization\n"); return; } - JITDUMP("Only known implementor of interface %p (%s) is %p (%s)!\n", dspPtr(objClass), objClassName, - uniqueImplementingClass, eeGetClassName(uniqueImplementingClass)); + JITDUMP("Consdering guarded devirt...\n"); - bool guessUniqueInterface = true; + // See if the runtime can provide a class to guess for. + // + const unsigned interfaceLikelihoodThreshold = 25; + unsigned likelihood = 0; + unsigned numberOfClasses = 0; + CORINFO_CLASS_HANDLE likelyClass = + info.compCompHnd->getLikelyClass(info.compMethodHnd, baseClass, ilOffset, &likelihood, &numberOfClasses); - INDEBUG(guessUniqueInterface = (JitConfig.JitGuardedDevirtualizationGuessUniqueInterface() > 0);); + if (likelyClass == NO_CLASS_HANDLE) + { + JITDUMP("No likely implementor of interface %p (%s), sorry\n", dspPtr(objClass), objClassName); + return; + } + else + { + JITDUMP("Likely implementor of interface %p (%s) is %p (%s) [likelihood:%u classes seen:%u]\n", + dspPtr(objClass), objClassName, likelyClass, eeGetClassName(likelyClass), likelihood, + numberOfClasses); + } - if (!guessUniqueInterface) + // Todo: a more advanced heuristic using likelihood, number of + // classes, and the profile count for this block. + // + // For now we will guess if the likelihood is 25% or more, as studies + // have shown this should pay off for interface calls. + // + if (likelihood < interfaceLikelihoodThreshold) { - JITDUMP("Guarded devirt for unique interface implementor is not enabled, sorry\n"); + JITDUMP("Not guessing for class; likelihood is below interface call threshold %u\n", + interfaceLikelihoodThreshold); return; } - // Ask the runtime to determine the method that would be called based on the guessed-for type. - CORINFO_CONTEXT_HANDLE ownerType = *contextHandle; - CORINFO_METHOD_HANDLE uniqueImplementingMethod = - info.compCompHnd->resolveVirtualMethod(baseMethod, uniqueImplementingClass, ownerType); + // Ask the runtime to determine the method that would be called based on the likely type. + // + CORINFO_CONTEXT_HANDLE ownerType = *contextHandle; + CORINFO_METHOD_HANDLE likelyMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, likelyClass, ownerType); - if (uniqueImplementingMethod == nullptr) + if (likelyMethod == nullptr) { JITDUMP("Can't figure out which method would be invoked, sorry\n"); return; } - JITDUMP("Interface call would invoke method %s\n", eeGetMethodName(uniqueImplementingMethod, nullptr)); - DWORD uniqueMethodAttribs = info.compCompHnd->getMethodAttribs(uniqueImplementingMethod); - DWORD uniqueClassAttribs = info.compCompHnd->getClassAttribs(uniqueImplementingClass); - - addGuardedDevirtualizationCandidate(call, uniqueImplementingMethod, uniqueImplementingClass, - uniqueMethodAttribs, uniqueClassAttribs); return; } @@ -20833,29 +20879,91 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, { JITDUMP(" Class not final or exact%s\n", isInterface ? "" : ", and method not final"); - // Have we enabled guarded devirtualization by guessing the jit's best class? - bool guessJitBestClass = true; - INDEBUG(guessJitBestClass = (JitConfig.JitGuardedDevirtualizationGuessBestClass() > 0);); + // Don't try guarded devirtualiztion when we're doing late devirtualization. + // + if (isLateDevirtualization) + { + JITDUMP("No guarded devirt during late devirtualization\n"); + return; + } + + JITDUMP("Consdering guarded devirt...\n"); + + // See if there's a likely guess for the class. + // + // pass objClass here, or baseClass? + // always query, or do so under condition? + // + const unsigned virtualLikelihoodThreshold = 30; + unsigned likelihood = 0; + unsigned numberOfClasses = 0; + CORINFO_CLASS_HANDLE likelyClass = + info.compCompHnd->getLikelyClass(info.compMethodHnd, baseClass, ilOffset, &likelihood, &numberOfClasses); + + if (likelyClass != NO_CLASS_HANDLE) + { + JITDUMP("Likely class for %p (%s) is %p (%s) [likelihood:%u classes seen:%u]\n", dspPtr(objClass), + objClassName, likelyClass, eeGetClassName(likelyClass), likelihood, numberOfClasses); + } + else + { + // Have we enabled guarded devirtualization by guessing the jit's best class? + bool guessJitBestClass = true; + INDEBUG(guessJitBestClass = (JitConfig.JitGuardedDevirtualizationGuessBestClass() > 0);); + + if (!guessJitBestClass) + { + JITDUMP("No guarded devirt: no likely class and guessing for jit best class disabled\n"); + return; + } + + // We will use the class that introduced the method as our guess + // for the runtime class of othe object. + // + // We don't know now likely this is; just choose a value that gets + // past the threshold. + likelyClass = info.compCompHnd->getMethodClass(derivedMethod); + likelihood = virtualLikelihoodThreshold; + + JITDUMP("Will guess implementing class for class %p (%s) is %p (%s)!\n", dspPtr(objClass), objClassName, + likelyClass, eeGetClassName(likelyClass)); + } - if (!guessJitBestClass) + // Todo: a more advanced heuristic using likelihood, number of + // classes, and the profile count for this block. + // + // For now we will guess if the likelihood is 30% or more, as studies + // have shown this should pay off for interface calls. + // + if (likelihood < virtualLikelihoodThreshold) { - JITDUMP("No guarded devirt: guessing for jit best class disabled\n"); + JITDUMP("Not guessing for class; likelihood is below virtual call threshold %u\n", + virtualLikelihoodThreshold); return; } - // Don't try guarded devirtualiztion when we're doing late devirtualization. - if (isLateDevirtualization) + // Figure out which method will be called. + // + CORINFO_CONTEXT_HANDLE ownerType = *contextHandle; + CORINFO_METHOD_HANDLE likelyMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, likelyClass, ownerType); + + if (likelyMethod == nullptr) { - JITDUMP("No guarded devirt during late devirtualization\n"); + JITDUMP("Can't figure out which method would be invoked, sorry\n"); return; } - // We will use the class that introduced the method as our guess - // for the runtime class of othe object. - CORINFO_CLASS_HANDLE derivedClass = info.compCompHnd->getMethodClass(derivedMethod); + JITDUMP("Virtual call would invoke method %s\n", eeGetMethodName(likelyMethod, nullptr)); + + // Some of these may be redundant + // + DWORD likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod); + DWORD likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass); // Try guarded devirtualization. - addGuardedDevirtualizationCandidate(call, derivedMethod, derivedClass, derivedMethodAttribs, objClassAttribs); + // + addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs, + likelihood); return; } @@ -20864,6 +20972,14 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, JITDUMP(" %s; can devirtualize\n", note); + // See if the method we're devirtualizing to is an intrinsic. + // + if (derivedMethodAttribs & (CORINFO_FLG_JIT_INTRINSIC | CORINFO_FLG_INTRINSIC)) + { + JITDUMP("!!! Devirt to intrinsic in %s, calling %s::%s\n", impInlineRoot()->info.compFullName, derivedClassName, + derivedMethodName); + } + // Make the updates. call->gtFlags &= ~GTF_CALL_VIRT_VTABLE; call->gtFlags &= ~GTF_CALL_VIRT_STUB; @@ -21256,12 +21372,14 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) // classHandle - class that will be tested for at runtime // methodAttr - attributes of the method // classAttr - attributes of the class +// likelihood - odds that this class is the class seen at runtime // void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, CORINFO_METHOD_HANDLE methodHandle, CORINFO_CLASS_HANDLE classHandle, unsigned methodAttr, - unsigned classAttr) + unsigned classAttr, + unsigned likelihood) { // This transformation only makes sense for virtual calls assert(call->IsVirtual()); @@ -21301,24 +21419,60 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, return; } +#ifdef DEBUG + + // See if disabled by range + // + static ConfigMethodRange JitGuardedDevirtualizationRange; + JitGuardedDevirtualizationRange.EnsureInit(JitConfig.JitGuardedDevirtualizationRange()); + assert(!JitGuardedDevirtualizationRange.Error()); + + static bool first = true; + + if (first) + { + first = false; + + if (!JitGuardedDevirtualizationRange.IsEmpty()) + { + printf("**** Range-based GDV enabled for ****\n"); + JitGuardedDevirtualizationRange.Dump(); + } + } + + if (!JitGuardedDevirtualizationRange.Contains(impInlineRoot()->info.compMethodHash())) + { + JITDUMP("NOT Marking call [%06u] as guarded devirtualization candidate -- excluded by " + "JitGuardedDevirtualizationRange", + dspTreeID(call)); + return; + } + +#endif + // We're all set, proceed with candidate creation. + // JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for class %s\n", dspTreeID(call), eeGetClassName(classHandle)); setMethodHasGuardedDevirtualization(); call->SetGuardedDevirtualizationCandidate(); // Spill off any GT_RET_EXPR subtrees so we can clone the call. + // SpillRetExprHelper helper(this); helper.StoreRetExprResultsInArgs(call); // Gather some information for later. Note we actually allocate InlineCandidateInfo // here, as the devirtualized half of this call will likely become an inline candidate. + // GuardedDevirtualizationCandidateInfo* pInfo = new (this, CMK_Inlining) InlineCandidateInfo; pInfo->guardedMethodHandle = methodHandle; pInfo->guardedClassHandle = classHandle; + pInfo->likelihood = likelihood; // Save off the stub address since it shares a union with the candidate info. + // if (call->IsVirtualStub()) { JITDUMP("Saving stub addr %p in candidate info\n", dspPtr(call->gtStubCallStubAddr)); diff --git a/src/coreclr/src/jit/indirectcalltransformer.cpp b/src/coreclr/src/jit/indirectcalltransformer.cpp index ebbce239d168e7..156d7c9c3a4094 100644 --- a/src/coreclr/src/jit/indirectcalltransformer.cpp +++ b/src/coreclr/src/jit/indirectcalltransformer.cpp @@ -192,6 +192,7 @@ class IndirectCallTransformer thenBlock = nullptr; elseBlock = nullptr; origCall = nullptr; + likelihood = HIGH_PROBABILITY; } //------------------------------------------------------------------------ @@ -204,7 +205,7 @@ class IndirectCallTransformer void Transform() { - JITDUMP("*** %s: transforming" FMT_STMT "\n", Name(), stmt->GetID()); + JITDUMP("*** %s: transforming " FMT_STMT "\n", Name(), stmt->GetID()); FixupRetExpr(); ClearFlag(); CreateRemainder(); @@ -228,9 +229,8 @@ class IndirectCallTransformer // void CreateRemainder() { - remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt); - unsigned propagateFlags = currBlock->bbFlags & BBF_GC_SAFE_POINT; - remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; + remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt); + remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | BBF_INTERNAL; } virtual void CreateCheck() = 0; @@ -248,11 +248,7 @@ class IndirectCallTransformer BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter) { BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true); - if ((insertAfter->bbFlags & BBF_INTERNAL) == 0) - { - block->bbFlags &= ~BBF_INTERNAL; - block->bbFlags |= BBF_IMPORTED; - } + block->bbFlags |= BBF_IMPORTED; return block; } @@ -274,8 +270,8 @@ class IndirectCallTransformer { remainderBlock->inheritWeight(currBlock); checkBlock->inheritWeight(currBlock); - thenBlock->inheritWeightPercentage(currBlock, HIGH_PROBABILITY); - elseBlock->inheritWeightPercentage(currBlock, 100 - HIGH_PROBABILITY); + thenBlock->inheritWeightPercentage(currBlock, likelihood); + elseBlock->inheritWeightPercentage(currBlock, 100 - likelihood); } //------------------------------------------------------------------------ @@ -296,6 +292,7 @@ class IndirectCallTransformer BasicBlock* elseBlock; Statement* stmt; GenTreeCall* origCall; + unsigned likelihood; const int HIGH_PROBABILITY = 80; }; @@ -546,6 +543,10 @@ class IndirectCallTransformer return; } + likelihood = origCall->gtGuardedDevirtualizationCandidateInfo->likelihood; + assert((likelihood >= 0) && (likelihood <= 100)); + JITDUMP("Likelihood of correct guess is %u\n", likelihood); + Transform(); } @@ -688,7 +689,7 @@ class IndirectCallTransformer thenBlock->bbFlags |= currBlock->bbFlags & BBF_SPLIT_GAINED; InlineCandidateInfo* inlineInfo = origCall->gtInlineCandidateInfo; - CORINFO_CLASS_HANDLE clsHnd = inlineInfo->clsHandle; + CORINFO_CLASS_HANDLE clsHnd = inlineInfo->guardedClassHandle; // copy 'this' to temp with exact type. const unsigned thisTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt this exact temp")); @@ -721,8 +722,9 @@ class IndirectCallTransformer assert(!call->IsVirtual()); // Re-establish this call as an inline candidate. - GenTree* oldRetExpr = inlineInfo->retExpr; - inlineInfo->clsHandle = clsHnd; + GenTree* oldRetExpr = inlineInfo->retExpr; + // Todo -- pass this back from impdevirt...? + inlineInfo->clsHandle = compiler->info.compCompHnd->getMethodClass(methodHnd); inlineInfo->exactContextHnd = context; call->gtInlineCandidateInfo = inlineInfo; diff --git a/src/coreclr/src/jit/inline.h b/src/coreclr/src/jit/inline.h index 4918e6ca7bef58..b8aba5dd5fa6ee 100644 --- a/src/coreclr/src/jit/inline.h +++ b/src/coreclr/src/jit/inline.h @@ -512,14 +512,24 @@ class InlineResult bool m_Reported; }; -// GuardedDevirtualizationCandidateInfo provides information about -// a potential target of a virtual call. +// ClassProfileCandidateInfo provides information about +// profiling an indirect or virtual call. +// +struct ClassProfileCandidateInfo +{ + IL_OFFSET ilOffset; + unsigned probeIndex; + void* stubAddr; +}; -struct GuardedDevirtualizationCandidateInfo +// GuardedDevirtualizationCandidateInfo provides information about +// a potential target of a virtual or interface call. +// +struct GuardedDevirtualizationCandidateInfo : ClassProfileCandidateInfo { CORINFO_CLASS_HANDLE guardedClassHandle; CORINFO_METHOD_HANDLE guardedMethodHandle; - void* stubAddr; + unsigned likelihood; }; // InlineCandidateInfo provides basic information about a particular @@ -527,7 +537,7 @@ struct GuardedDevirtualizationCandidateInfo // // It is a superset of GuardedDevirtualizationCandidateInfo: calls // can start out as GDv candidates and turn into inline candidates - +// struct InlineCandidateInfo : public GuardedDevirtualizationCandidateInfo { CORINFO_METHOD_INFO methInfo; diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 4e4b2981ee18d0..7b14eb30bfdc4e 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -416,8 +416,10 @@ CONFIG_INTEGER(JitEnableGuardedDevirtualization, W("JitEnableGuardedDevirtualiza #if defined(DEBUG) // Various policies for GuardedDevirtualization +CONFIG_STRING(JitGuardedDevirtualizationRange, W("JitGuardedDevirtualizationRange")) CONFIG_INTEGER(JitGuardedDevirtualizationGuessUniqueInterface, W("JitGuardedDevirtualizationGuessUniqueInterface"), 1) CONFIG_INTEGER(JitGuardedDevirtualizationGuessBestClass, W("JitGuardedDevirtualizationGuessBestClass"), 1) +CONFIG_INTEGER(JitGuardedDeivrtualizationUseProfile, W("JitGuardedDevirtualizationUseProfile"), 0) #endif // DEBUG // Enable insertion of patchpoints into Tier0 methods with loops. @@ -425,6 +427,10 @@ CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0) // Initial patchpoint counter value used by jitted code CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000) +// Profile instrumentation options +CONFIG_INTEGER(JitMinimalProfiling, W("JitMinimalProfiling"), 0) +CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 0) + #if defined(DEBUG) // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the // file, certain other JIT config variables will be active. If the currently compiled function is not in the file, diff --git a/src/coreclr/src/jit/patchpoint.cpp b/src/coreclr/src/jit/patchpoint.cpp index 790ec3e56a008c..8ce03ea9590620 100644 --- a/src/coreclr/src/jit/patchpoint.cpp +++ b/src/coreclr/src/jit/patchpoint.cpp @@ -93,11 +93,7 @@ class PatchpointTransformer BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter) { BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true); - if ((insertAfter->bbFlags & BBF_INTERNAL) == 0) - { - block->bbFlags &= ~BBF_INTERNAL; - block->bbFlags |= BBF_IMPORTED; - } + block->bbFlags |= BBF_IMPORTED; return block; } @@ -138,6 +134,7 @@ class PatchpointTransformer block->bbJumpKind = BBJ_COND; block->bbJumpDest = remainderBlock; helperBlock->bbFlags |= BBF_BACKWARD_JUMP; + block->bbFlags |= BBF_INTERNAL; // Update weights remainderBlock->inheritWeight(block); diff --git a/src/coreclr/src/jit/utils.h b/src/coreclr/src/jit/utils.h index 149ef88b002749..52c976a14f630e 100644 --- a/src/coreclr/src/jit/utils.h +++ b/src/coreclr/src/jit/utils.h @@ -169,6 +169,11 @@ class ConfigMethodRange } } + bool IsEmpty() const + { + return m_lastRange == 0; + } + // Error checks bool Error() const { diff --git a/src/coreclr/src/vm/jithelpers.cpp b/src/coreclr/src/vm/jithelpers.cpp index c101e43575a72e..419260ecec2698 100644 --- a/src/coreclr/src/vm/jithelpers.cpp +++ b/src/coreclr/src/vm/jithelpers.cpp @@ -55,6 +55,7 @@ #include "runtimehandles.h" #include "castcache.h" #include "onstackreplacement.h" +#include "pgo.h" //======================================================================== // @@ -5233,6 +5234,73 @@ void JIT_Patchpoint(int* counter, int ilOffset) #endif // FEATURE_ON_STACK_REPLACEMENT +HCIMPL2(void, JIT_ClassProfile, Object *obj, void* tableAddress) +{ + FCALL_CONTRACT; + FC_GC_POLL_NOT_NEEDED(); + + OBJECTREF objRef = ObjectToOBJECTREF(obj); + VALIDATEOBJECTREF(objRef); + + ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*) tableAddress; + const int count = classProfile->Count++; + const int S = ICorJitInfo::ClassProfile::SIZE; + const int N = ICorJitInfo::ClassProfile::SAMPLE_INTERVAL; + + if (objRef == NULL) + { + return; + } + + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)objRef->GetMethodTable(); + +#ifdef _DEBUG + PgoManager::VerifyAddress(classProfile); + PgoManager::VerifyAddress(classProfile + 1); +#endif + + // If table is not yet full, just add entries in. + // + if (count < S) + { + classProfile->ClassTable[count] = clsHnd; + } + else + { + // generate a random number (xorshift32) + // + // intentionally simple so we can have multithreaded + // access w/o tearing state. + // + static unsigned s_rng = 100; + + unsigned x = s_rng; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + s_rng = x; + + // N is the sampling window size, + // it should be larger than the table size. + // + // If we let N == count then we are building an entire + // run sample -- probability of update decreases over time. + // Would be a good strategy for an AOT profiler. + // + // But for TieredPGO we would prefer something that is more + // weighted to recent observations. + // + // For S=4, N=128, we'll sample (on average) every 32nd call. + // + if ((x % N) < S) + { + unsigned i = x % S; + classProfile->ClassTable[i] = clsHnd; + } + } +} +HCIMPLEND + //======================================================================== // // INTEROP HELPERS diff --git a/src/coreclr/src/vm/jitinterface.cpp b/src/coreclr/src/vm/jitinterface.cpp index 3824a1f87773d7..3da08e35a82724 100644 --- a/src/coreclr/src/vm/jitinterface.cpp +++ b/src/coreclr/src/vm/jitinterface.cpp @@ -11924,6 +11924,54 @@ HRESULT CEEJitInfo::getMethodBlockCounts ( return hr; } +CORINFO_CLASS_HANDLE CEEJitInfo::getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32 * pLikelihood, + UINT32 * pNumberOfClasses +) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + CORINFO_CLASS_HANDLE result = NULL; + *pLikelihood = 0; + *pNumberOfClasses = 0; + + JIT_TO_EE_TRANSITION(); + +#ifdef FEATURE_PGO + + // If TieredPGO is enabled, query the per call site class profile. + // + MethodDesc* pMD = (MethodDesc*)ftnHnd; + unsigned codeSize = 0; + if (pMD->IsDynamicMethod()) + { + unsigned stackSize, ehSize; + CorInfoOptions options; + DynamicResolver * pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver(); + pResolver->GetCodeInfo(&codeSize, &stackSize, &options, &ehSize); + } + else if (pMD->HasILHeader()) + { + COR_ILMETHOD_DECODER decoder(pMD->GetILHeader()); + codeSize = decoder.GetCodeSize(); + } + + result = PgoManager::getLikelyClass(pMD, codeSize, ilOffset, pLikelihood, pNumberOfClasses); + +#endif + + EE_TO_JIT_TRANSITION(); + + return result; +} + void CEEJitInfo::allocMem ( ULONG hotCodeSize, /* IN */ ULONG coldCodeSize, /* IN */ @@ -12660,7 +12708,13 @@ CORJIT_FLAGS GetCompileFlags(MethodDesc * ftn, CORJIT_FLAGS flags, CORINFO_METHO #ifdef FEATURE_PGO - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0) + // Instrument, if + // + // * We're writing pgo data and we're jitting at Tier0. + // * Tiered PGO is enabled and we're jitting at Tier0. + // + if ((CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0) + && flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0)) { flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR); } @@ -14159,6 +14213,17 @@ HRESULT CEEInfo::getMethodBlockCounts( UNREACHABLE_RET(); // only called on derived class. } +CORINFO_CLASS_HANDLE CEEInfo::getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfCases +) +{ + LIMITED_METHOD_CONTRACT; + UNREACHABLE_RET(); // only called on derived class. +} void CEEInfo::recordCallSite( ULONG instrOffset, /* IN */ diff --git a/src/coreclr/src/vm/jitinterface.h b/src/coreclr/src/vm/jitinterface.h index ccdafd03e87261..7518004464e7d7 100644 --- a/src/coreclr/src/vm/jitinterface.h +++ b/src/coreclr/src/vm/jitinterface.h @@ -1045,6 +1045,14 @@ class CEEInfo : public ICorJitInfo UINT32 * pNumRuns ); + CORINFO_CLASS_HANDLE getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32 * pLikelihood, + UINT32 * pNumberOfClasses + ); + void recordCallSite( ULONG instrOffset, /* IN */ CORINFO_SIG_INFO * callSig, /* IN */ @@ -1247,6 +1255,14 @@ class CEEJitInfo : public CEEInfo UINT32 * pNumRuns ); + CORINFO_CLASS_HANDLE getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32 * pLikelihood, + UINT32 * pNumberOfClasses + ); + void recordCallSite( ULONG instrOffset, /* IN */ CORINFO_SIG_INFO * callSig, /* IN */ diff --git a/src/coreclr/src/vm/pgo.cpp b/src/coreclr/src/vm/pgo.cpp index 16d57f29d8cc8d..a4e5255203a407 100644 --- a/src/coreclr/src/vm/pgo.cpp +++ b/src/coreclr/src/vm/pgo.cpp @@ -8,11 +8,74 @@ #ifdef FEATURE_PGO ICorJitInfo::BlockCounts* PgoManager::s_PgoData; -unsigned PgoManager::s_PgoIndex; +unsigned volatile PgoManager::s_PgoIndex; const char* const PgoManager::s_FileHeaderString = "*** START PGO Data, max index = %u ***\n"; const char* const PgoManager::s_FileTrailerString = "*** END PGO Data ***\n"; const char* const PgoManager::s_MethodHeaderString = "@@@ token 0x%08X hash 0x%08X ilSize 0x%08X records 0x%08X index %u\n"; const char* const PgoManager::s_RecordString = "ilOffs %u count %u\n"; +const char* const PgoManager::s_ClassProfileHeader = "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; +const char* const PgoManager::s_ClassProfileEntry = "class %p (%s) count %u\n"; + +struct HistogramEntry +{ + CORINFO_CLASS_HANDLE m_mt; + unsigned m_count; +}; + +struct Histogram +{ + Histogram(const ICorJitInfo::ClassProfile* classProfile); + + // Number of nonzero entries in the table + unsigned m_count; + // Total count from all entries in the table + unsigned m_totalCount; + HistogramEntry m_histogram[ICorJitInfo::ClassProfile::SIZE]; +}; + +Histogram::Histogram(const ICorJitInfo::ClassProfile* classProfile) +{ + m_count = 0; + m_totalCount = 0; + + for (unsigned k = 0; k < ICorJitInfo::ClassProfile::SIZE; k++) + { + CORINFO_CLASS_HANDLE currentEntry = classProfile->ClassTable[k]; + + if (currentEntry == NULL) + { + continue; + } + + m_totalCount++; + + bool found = false; + unsigned h = 0; + for(; h < m_count; h++) + { + if (m_histogram[h].m_mt == currentEntry) + { + m_histogram[h].m_count++; + found = true; + break; + } + } + + if (!found) + { + m_histogram[h].m_mt = currentEntry; + m_histogram[h].m_count = 1; + m_count++; + } + } + + // Zero the remainder + for (unsigned k = m_count; k < ICorJitInfo::ClassProfile::SIZE; k++) + { + m_histogram[k].m_mt = 0; + m_histogram[k].m_count = 0; + } +} void PgoManager::Initialize() { @@ -36,6 +99,12 @@ void PgoManager::Shutdown() WritePgoData(); } +void PgoManager::VerifyAddress(void* address) +{ + _ASSERTE(address > s_PgoData); + _ASSERTE(address <= s_PgoData + BUFFER_SIZE); +} + void PgoManager::WritePgoData() { if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) == 0) @@ -47,7 +116,6 @@ void PgoManager::WritePgoData() { return; } - CLRConfigStringHolder fileName(CLRConfig::GetConfigValue(CLRConfig::INTERNAL_PGODataPath)); if (fileName == NULL) @@ -80,15 +148,83 @@ void PgoManager::WritePgoData() index += 2; - ICorJitInfo::BlockCounts* records = &s_PgoData[index]; - unsigned recordCount = header->recordCount - 2; - unsigned lastOffset = 0; - for (unsigned i = 0; i < recordCount; i++) + ICorJitInfo::BlockCounts* records = &s_PgoData[index]; + unsigned recordCount = header->recordCount - 2; + unsigned lastOffset = 0; + bool hasClassProfile = false; + unsigned i = 0; + + while (i < recordCount) { const unsigned thisOffset = records[i].ILOffset; - assert((thisOffset > lastOffset) || (lastOffset == 0)); + + + if (((int) thisOffset) < 0) + { + // remainder must be class probe data + hasClassProfile = true; + break; + } + lastOffset = thisOffset; fprintf(pgoDataFile, s_RecordString, records[i].ILOffset, records[i].ExecutionCount); + i++; + } + + if (hasClassProfile) + { + fflush(pgoDataFile); + + // Write out histogram of each probe's data. + // We currently don't expect to be able to read this back in. + // + while (i < recordCount) + { + // Should be enough room left for a class profile. + _ASSERTE(i + sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts) <= recordCount); + + const ICorJitInfo::ClassProfile* classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[i + index]; + + // Form a histogram... + // + Histogram h(classProfile); + + // And display... + // + // Figure out if this is a virtual or interface probe. + // + const char* profileType = "virtual"; + + if (classProfile->ILOffset & 0x40000000) + { + profileType = "interface"; + } + + // "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; + // + fprintf(pgoDataFile, s_ClassProfileHeader, (classProfile->ILOffset & 0x0FFFFFFF), + classProfile->Count, h.m_count, h.m_totalCount, profileType); + + fflush(pgoDataFile); + + for (unsigned j = 0; j < h.m_count; j++) + { + CORINFO_CLASS_HANDLE clsHnd = h.m_histogram[j].m_mt; + const char* className = "n/a"; +#ifdef _DEBUG + TypeHandle typeHnd(clsHnd); + MethodTable* pMT = typeHnd.AsMethodTable(); + className = pMT->GetDebugClassName(); +#endif + fprintf(pgoDataFile, s_ClassProfileEntry, clsHnd, className, h.m_histogram[j].m_count); + } + + // Advance to next entry. + // + i += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); + + fflush(pgoDataFile); + } } index += recordCount; @@ -179,7 +315,7 @@ void PgoManager::ReadPgoData() continue; } - assert(index == rIndex); + _ASSERTE(index == rIndex); methods++; // If there's not enough room left, bail @@ -218,8 +354,13 @@ void PgoManager::ReadPgoData() if (sscanf_s(buffer, s_RecordString, &s_PgoData[index].ILOffset, &s_PgoData[index].ExecutionCount) != 2) { - failed = true; - break; + // This might be class profile data; if so just skip it. + // + if (strstr(buffer, "class") != buffer) + { + failed = true; + break; + } } index++; @@ -342,6 +483,166 @@ HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT3 return E_NOTIMPL; } +CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses) +{ + *pLikelihood = 0; + *pNumberOfClasses = 0; + + // Bail if there's no profile data. + // + if (s_PgoData == NULL) + { + return NULL; + } + + // See if we can find profile data for this method in the profile buffer. + // + const unsigned maxIndex = s_PgoIndex; + const unsigned token = pMD->IsDynamicMethod() ? 0 : pMD->GetMemberDef(); + const unsigned hash = pMD->GetStableHash(); + + unsigned index = 0; + unsigned methodsChecked = 0; + + while (index < maxIndex) + { + // The first two "records" of each entry are actually header data + // to identify the method. + // + Header* const header = (Header*)&s_PgoData[index]; + + // Sanity check that header data looks reasonable. If not, just + // fail the lookup. + // + if ((header->recordCount < MIN_RECORD_COUNT) || (header->recordCount > MAX_RECORD_COUNT)) + { + break; + } + + // See if the header info matches the current method. + // + if ((header->token == token) && (header->hash == hash) && (header->ilSize == ilSize)) + { + // Yep, found data. See if there is a suitable class profile. + // + // This bit is currently somewhat hacky ... we scan the records, the count records come + // first and are in increasing IL offset order. Class profiles have inverted IL offsets + // so when we find an offset with high bit set, it's going to be an class profile. + // + unsigned countILOffset = 0; + unsigned j = 2; + + // Skip past all the count entries + // + while (j < header->recordCount) + { + if (((int)s_PgoData[index + j].ILOffset) < 0) + { + break; + } + + countILOffset = s_PgoData[index + j].ILOffset; + j++; + } + + // Now we're in the "class profile" portion of the slab for this method. + // Look for the one that has the right IL offset. + // + while (j < header->recordCount) + { + const ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[index + j]; + + if ((classProfile->ILOffset & 0x0FFFFFFF) != ilOffset) + { + // Need to make sure this is even divisor + // + j += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); + continue; + } + + // Form a histogram + // + Histogram h(classProfile); + + // Use histogram count as number of classes estimate + // + *pNumberOfClasses = h.m_count; + + // Report back what we've learned + // (perhaps, use count to augment likelihood?) + // + switch (h.m_count) + { + case 0: + { + return NULL; + } + break; + + case 1: + { + *pLikelihood = 100; + return h.m_histogram[0].m_mt; + } + break; + + case 2: + { + if (h.m_histogram[0].m_count >= h.m_histogram[1].m_count) + { + *pLikelihood = (100 * h.m_histogram[0].m_count) / h.m_totalCount; + return h.m_histogram[0].m_mt; + } + else + { + *pLikelihood = (100 * h.m_histogram[1].m_count) / h.m_totalCount; + return h.m_histogram[1].m_mt; + } + } + break; + + default: + { + // Find maximum entry and return it + // + unsigned maxIndex = 0; + unsigned maxCount = 0; + + for (unsigned m = 0; m < h.m_count; m++) + { + if (h.m_histogram[m].m_count > maxCount) + { + maxIndex = m; + maxCount = h.m_histogram[m].m_count; + } + } + + if (maxCount > 0) + { + *pLikelihood = (100 * maxCount) / h.m_totalCount; + return h.m_histogram[maxIndex].m_mt; + } + + return NULL; + } + break; + } + } + + // Failed to find a class profile entry + // + return NULL; + } + + index += header->recordCount; + methodsChecked++; + } + + // Failed to find any sort of profile data for this method + // + return NULL; +} + #else // Stub version for !FEATURE_PGO builds @@ -364,4 +665,11 @@ HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT3 return E_NOTIMPL; } +// Stub version for !FEATURE_PGO builds +// +CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset) +{ + return NULL; +} + #endif // FEATURE_PGO diff --git a/src/coreclr/src/vm/pgo.h b/src/coreclr/src/vm/pgo.h index c5fc5273236f12..3ba4ab6ddb871d 100644 --- a/src/coreclr/src/vm/pgo.h +++ b/src/coreclr/src/vm/pgo.h @@ -22,18 +22,26 @@ class PgoManager static HRESULT allocMethodBlockCounts(MethodDesc* pMD, UINT32 count, ICorJitInfo::BlockCounts** pBlockCounts, unsigned ilSize); - // Retreive the profile block count buffer for a method + // Retrieve the profile block count buffer for a method static HRESULT getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT32* pCount, ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns); + // Retrieve the most likely class for a particular call + static CORINFO_CLASS_HANDLE getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses); + + // Verify address in bounds + static void VerifyAddress(void* address); + #ifdef FEATURE_PGO private: enum { - // Number of ICorJitInfo::BlockCount records in the global slab - BUFFER_SIZE = 64 * 1024, + // Number of ICorJitInfo::BlockCount records in the global slab. + // Currently 4MB for a 64 bit system. + // + BUFFER_SIZE = 8 * 64 * 1024, MIN_RECORD_COUNT = 3, MAX_RECORD_COUNT = BUFFER_SIZE }; @@ -57,13 +65,15 @@ class PgoManager static ICorJitInfo::BlockCounts* s_PgoData; // Index of next free entry in the global slab - static unsigned s_PgoIndex; + static unsigned volatile s_PgoIndex; // Formatting strings for file input/output static const char* const s_FileHeaderString; static const char* const s_FileTrailerString; static const char* const s_MethodHeaderString; static const char* const s_RecordString; + static const char* const s_ClassProfileHeader; + static const char* const s_ClassProfileEntry; #endif // FEATURE_PGO }; diff --git a/src/coreclr/src/zap/zapinfo.cpp b/src/coreclr/src/zap/zapinfo.cpp index a72c3792c2297b..036457ab673d8e 100644 --- a/src/coreclr/src/zap/zapinfo.cpp +++ b/src/coreclr/src/zap/zapinfo.cpp @@ -1073,6 +1073,16 @@ HRESULT ZapInfo::getMethodBlockCounts ( return S_OK; } +CORINFO_CLASS_HANDLE ZapInfo::getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32* pLikelihood, + UINT32* pNumberOfClasses) +{ + return NULL; +} + void ZapInfo::allocMem( ULONG hotCodeSize, /* IN */ ULONG coldCodeSize, /* IN */ diff --git a/src/coreclr/src/zap/zapinfo.h b/src/coreclr/src/zap/zapinfo.h index 3a44d2e7c0b548..74a8c0eebf2c98 100644 --- a/src/coreclr/src/zap/zapinfo.h +++ b/src/coreclr/src/zap/zapinfo.h @@ -313,6 +313,13 @@ class ZapInfo ICorJitInfo::BlockCounts ** pBlockCounts, UINT32 * pNumRuns); + CORINFO_CLASS_HANDLE getLikelyClass( + CORINFO_METHOD_HANDLE ftnHnd, + CORINFO_CLASS_HANDLE baseHnd, + UINT32 ilOffset, + UINT32 * pLikelihood, + UINT32 * pNumberOfClasses); + DWORD getJitFlags(CORJIT_FLAGS* jitFlags, DWORD sizeInBytes); bool runWithErrorTrap(void (*function)(void*), void* param); diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 98ca7ab6e76ece..4371f64be3ee6a 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -55,6 +55,7 @@ COMPlus_EnableEHWriteThru; COMPlus_JitObjectStackAllocation; COMPlus_JitInlinePolicyProfile; + COMPlus_JitClassProfiling; RunningIlasmRoundTrip @@ -147,6 +148,7 @@ + From 2b0b1cacd2e7f4415344574e9e018944bf9e0114 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 23 Nov 2020 15:22:32 -0800 Subject: [PATCH 02/10] fix SPMI packet numbers --- .../src/ToolBox/superpmi/superpmi-shared/methodcontext.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h index 158184555abdd0..75466c13a480e5 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -1378,7 +1378,7 @@ class MethodContext }; // ********************* Please keep this up-to-date to ease adding more *************** -// Highest packet number: 181 +// Highest packet number: 182 // ************************************************************************************* enum mcPackets { @@ -1477,7 +1477,7 @@ enum mcPackets Packet_GetJitFlags = 154, // Added 2/3/2016 Packet_GetJitTimeLogFilename = 67, Packet_GetJustMyCodeHandle = 68, - Packet_GetLikelyClass = 181, // Added 9/27/2020 + Packet_GetLikelyClass = 182, // Added 9/27/2020 Packet_GetLocationOfThisType = 69, Packet_GetMethodAttribs = 70, Packet_GetMethodClass = 71, From 1533a1d3e27f7593d9af17f52574e6da83b882ab Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 23 Nov 2020 18:28:10 -0800 Subject: [PATCH 03/10] stub out crossgen2 support --- .../tools/Common/JitInterface/CorInfoBase.cs | 28 +++++++++++++++---- .../ThunkGenerator/ThunkInput.txt | 1 + .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 ++ .../src/tools/aot/jitinterface/jitinterface.h | 10 +++++++ 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs index 1c8c7ba76ae018..511ea87ab15c4d 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs @@ -2440,6 +2440,21 @@ static HRESULT _getMethodBlockCounts(IntPtr thisHandle, IntPtr* ppException, COR } } + [UnmanagedCallersOnly] + static CORINFO_CLASS_STRUCT_* _getLikelyClass(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint ilOffset, uint* pLikelihood, uint* pNumberOfClasses) + { + var _this = GetThis(thisHandle); + try + { + return _this.getLikelyClass(ftnHnd, baseHnd, ilOffset, ref *pLikelihood, ref *pNumberOfClasses); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + return default; + } + } + [UnmanagedCallersOnly] static void _recordCallSite(IntPtr thisHandle, IntPtr* ppException, uint instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_STRUCT_* methodHandle) { @@ -2516,7 +2531,7 @@ static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_FLAGS* f static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 170); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 171); callbacks[0] = (delegate* unmanaged)&_getMethodAttribs; callbacks[1] = (delegate* unmanaged)&_setMethodAttribs; @@ -2683,11 +2698,12 @@ static IntPtr GetUnmanagedCallbacks() callbacks[162] = (delegate* unmanaged)&_reportFatalError; callbacks[163] = (delegate* unmanaged)&_allocMethodBlockCounts; callbacks[164] = (delegate* unmanaged)&_getMethodBlockCounts; - callbacks[165] = (delegate* unmanaged)&_recordCallSite; - callbacks[166] = (delegate* unmanaged)&_recordRelocation; - callbacks[167] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[168] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[169] = (delegate* unmanaged)&_getJitFlags; + callbacks[165] = (delegate* unmanaged)&_getLikelyClass; + callbacks[166] = (delegate* unmanaged)&_recordCallSite; + callbacks[167] = (delegate* unmanaged)&_recordRelocation; + callbacks[168] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[169] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[170] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 348c59ec9f0bf8..757635757ffd95 100644 --- a/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -321,6 +321,7 @@ FUNCTIONS void reportFatalError(CorJitResult result) HRESULT allocMethodBlockCounts(UINT32 count, ICorJitInfo::BlockCounts** pBlockCounts) HRESULT getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, UINT32* pCount, ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns) + CORINFO_CLASS_HANDLE getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses) void recordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle) void recordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta) WORD getRelocTypeHint(void* target) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 90f76588691e10..07b3beffc38c90 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2341,6 +2341,9 @@ private HRESULT allocMethodBlockCounts(uint count, ref BlockCounts* pBlockCounts private HRESULT getMethodBlockCounts(CORINFO_METHOD_STRUCT_* ftnHnd, ref uint pCount, ref BlockCounts* pBlockCounts, ref uint pNumRuns) { throw new NotImplementedException("getBBProfileData"); } + private CORINFO_CLASS_STRUCT_* getLikelyClass(CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint IlOffset, ref uint pLikelihood, ref uint pNumberOfClasses) + { throw new NotImplementedException("getLikelyClass"); } + private void getAddressOfPInvokeTarget(CORINFO_METHOD_STRUCT_* method, ref CORINFO_CONST_LOOKUP pLookup) { MethodDesc methodDesc = HandleToObject(method); diff --git a/src/coreclr/src/tools/aot/jitinterface/jitinterface.h b/src/coreclr/src/tools/aot/jitinterface/jitinterface.h index ecb7e88915b70d..37828567f2b6c7 100644 --- a/src/coreclr/src/tools/aot/jitinterface/jitinterface.h +++ b/src/coreclr/src/tools/aot/jitinterface/jitinterface.h @@ -171,6 +171,7 @@ struct JitInterfaceCallbacks void (* reportFatalError)(void * thisHandle, CorInfoException** ppException, int result); int (* allocMethodBlockCounts)(void * thisHandle, CorInfoException** ppException, unsigned int count, void** pBlockCounts); int (* getMethodBlockCounts)(void * thisHandle, CorInfoException** ppException, void* ftnHnd, unsigned int* pCount, void** pBlockCounts, unsigned int* pNumRuns); + void* (* getLikelyClass)(void * thisHandle, CorInfoException** ppException, void* ftnHnd, void* baseHnd, unsigned int ilOffset, unsigned int* pLikelihood, unsigned int* pNumberOfClasses); void (* recordCallSite)(void * thisHandle, CorInfoException** ppException, unsigned int instrOffset, void* callSig, void* methodHandle); void (* recordRelocation)(void * thisHandle, CorInfoException** ppException, void* location, void* target, unsigned short fRelocType, unsigned short slotNum, int addlDelta); unsigned short (* getRelocTypeHint)(void * thisHandle, CorInfoException** ppException, void* target); @@ -1608,6 +1609,15 @@ class JitInterfaceWrapper return _ret; } + virtual void* getLikelyClass(void* ftnHnd, void* baseHnd, unsigned int ilOffset, unsigned int* pLikelihood, unsigned int* pNumberOfClasses) + { + CorInfoException* pException = nullptr; + void* _ret = _callbacks->getLikelyClass(_thisHandle, &pException, ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses); + if (pException != nullptr) + throw pException; + return _ret; + } + virtual void recordCallSite(unsigned int instrOffset, void* callSig, void* methodHandle) { CorInfoException* pException = nullptr; From b357320baaf253df09e2ab3763e2f48a2389b554 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 01:17:49 -0800 Subject: [PATCH 04/10] review feedback --- .../superpmi-shared/icorjitinfoimpl.h | 28 +++++++++++++++ src/coreclr/src/inc/corinfo.h | 10 +++--- src/coreclr/src/inc/corjit.h | 35 ++++++++++++++++--- src/coreclr/src/jit/block.h | 4 +-- src/coreclr/src/jit/compiler.h | 1 - src/coreclr/src/jit/flowgraph.cpp | 19 +++++----- src/coreclr/src/jit/importer.cpp | 2 +- .../src/tools/aot/jitinterface/jitwrapper.cpp | 10 +++--- src/coreclr/src/vm/jithelpers.cpp | 9 ++--- src/coreclr/src/vm/jitinterface.cpp | 2 +- src/coreclr/src/vm/pgo.cpp | 14 +++----- 11 files changed, 93 insertions(+), 41 deletions(-) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h index 9f62dd62460852..0a7e086551ee2f 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h @@ -979,6 +979,34 @@ struct BlockCounts // Also defined here: code:CORBBTPROF_BLOCK_DATA UINT32 ILOffset; UINT32 ExecutionCount; }; + +// Data structure for a single class probe. +// +// ILOffset is the IL offset in the method for the call site being probed. +// Currently it must be ORed with CLASS_FLAG and (for interface calls) +// INTERFACE_FLAG. +// +// Count is the number of times a call was made at that call site. +// +// SIZE is the number of entries in the table. +// +// SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE +// gives the average number of calls between table updates. +// +struct ClassProfile +{ + enum { + SIZE = 8, + SAMPLE_INTERVAL = 32, + CLASS_FLAG = 0x80000000, + INTERFACE_FLAG = 0x40000000, + OFFSET_MASK = 0x3FFFFFFF + }; + + UINT32 ILOffset; + UINT32 Count; + CORINFO_CLASS_HANDLE ClassTable[SIZE]; +}; */ // allocate a basic block profile buffer where execution counts will be stored diff --git a/src/coreclr/src/inc/corinfo.h b/src/coreclr/src/inc/corinfo.h index 8e472f3ef17b74..fa9db2c7593948 100644 --- a/src/coreclr/src/inc/corinfo.h +++ b/src/coreclr/src/inc/corinfo.h @@ -208,11 +208,11 @@ TODO: Talk about initializing strutures before use // ////////////////////////////////////////////////////////////////////////////////////////////////////////// -constexpr GUID JITEEVersionIdentifier = { /* 8031aa05-4568-40fc-a0d2-d971d8edba16 */ - 0x8031aa05, - 0x4568, - 0x40fc, - {0xa0, 0xd2, 0xd9, 0x71, 0xd8, 0xed, 0xba, 0x16} +constexpr GUID JITEEVersionIdentifier = { /* 0d235fe4-65a1-487a-8553-c845496da901 */ + 0x0d235fe4, + 0x65a1, + 0x487a, + {0x85, 0x53, 0xc8, 0x45, 0x49, 0x6d, 0xa9, 0x01} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/src/inc/corjit.h b/src/coreclr/src/inc/corjit.h index d58fb9a0fb25d8..13a9e3312702d5 100644 --- a/src/coreclr/src/inc/corjit.h +++ b/src/coreclr/src/inc/corjit.h @@ -251,9 +251,29 @@ class ICorJitInfo : public ICorDynamicInfo UINT32 ExecutionCount; }; + // Data structure for a single class probe. + // + // ILOffset is the IL offset in the method for the call site being probed. + // Currently it must be ORed with CLASS_FLAG and (for interface calls) + // INTERFACE_FLAG. + // + // Count is the number of times a call was made at that call site. + // + // SIZE is the number of entries in the table. + // + // SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE + // gives the average number of calls between table updates. + // struct ClassProfile { - enum { SIZE = 8, SAMPLE_INTERVAL = 32 }; + enum { + SIZE = 8, + SAMPLE_INTERVAL = 32, + CLASS_FLAG = 0x80000000, + INTERFACE_FLAG = 0x40000000, + OFFSET_MASK = 0x3FFFFFFF + }; + UINT32 ILOffset; UINT32 Count; CORINFO_CLASS_HANDLE ClassTable[SIZE]; @@ -277,13 +297,20 @@ class ICorJitInfo : public ICorDynamicInfo // Get the likely implementing class for a virtual call or interface call made by ftnHnd // at the indicated IL offset. baseHnd is the interface class or base class for the method - // being called. + // being called. May returns NULL. + // + // pLikelihood is the estimated percent chance that the class at runtime is the class + // returned by this method. A well-estimated monomorphic call site will return a likelihood + // of 100. + // + // pNumberOfClasses is the estimated number of different classes seen at the site. + // A well-estimated monomorphic call site will return 1. virtual CORINFO_CLASS_HANDLE getLikelyClass( CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, - UINT32 * pLikelihood, // estimated likelihood of the class (0...100) - UINT32 * pNumberOfClasses // estimated number of possible classes + UINT32 * pLikelihood, // OUT, estimated likelihood of the class (0...100) + UINT32 * pNumberOfClasses // OUT, estimated number of possible classes ) = 0; // Associates a native call site, identified by its offset in the native code stream, with diff --git a/src/coreclr/src/jit/block.h b/src/coreclr/src/jit/block.h index b1e4e67fd28ee1..96590ff2d836fb 100644 --- a/src/coreclr/src/jit/block.h +++ b/src/coreclr/src/jit/block.h @@ -448,7 +448,7 @@ struct BasicBlock : private LIR::Range #define BBF_BACKWARD_JUMP_TARGET MAKE_BBFLAG(36) // Block is a target of a backward jump #define BBF_PATCHPOINT MAKE_BBFLAG(37) // Block is a patchpoint -#define BBF_HAS_VIRTUAL_CALL MAKE_BBFLAG(38) // BB contains a call needing a class profile +#define BBF_HAS_CLASS_PROFILE MAKE_BBFLAG(38) // BB contains a call needing a class profile // clang-format on @@ -493,7 +493,7 @@ struct BasicBlock : private LIR::Range #define BBF_SPLIT_GAINED \ (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \ BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | \ - BBF_HAS_VTABREF | BBF_HAS_VIRTUAL_CALL) + BBF_HAS_VTABREF | BBF_HAS_CLASS_PROFILE) #ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index eb0c2a7cee7bfd..cef46401d1804a 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -6698,7 +6698,6 @@ class Compiler #define OMF_HAS_EXPRUNTIMELOOKUP 0x00000100 // Method contains a runtime lookup to an expandable dictionary. #define OMF_HAS_PATCHPOINT 0x00000200 // Method contains patchpoints #define OMF_NEEDS_GCPOLLS 0x00000400 // Method needs GC polls -#define OMF_HAS_VIRTUAL_CALL 0x00000800 // Method contains call that needs a class profile bool doesMethodHaveFatPointer() { diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 82d1cc7ecfb1b4..06833cea6a0617 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -420,13 +420,13 @@ void Compiler::fgInstrumentMethod() countOfBlocks++; } - // We've alread counted the number of class probes + // We've already counted the number of class probes // when importing. // int countOfCalls = info.compClassProbeCount; - // Optionally bail out, if there are less than three blocks an no call sites to profile - // One block is common. We don't expect to see zero or two blcoks here. + // Optionally bail out, if there are less than three blocks and no call sites to profile. + // One block is common. We don't expect to see zero or two blocks here. // // Note we have to at least visit all the profile call sites to properly restore their // stub addresses. So we can't bail out early if there are any of these. @@ -491,7 +491,7 @@ void Compiler::fgInstrumentMethod() continue; } - // We may see class probes in internal blcoks, thanks to the + // We may see class probes in internal blocks, thanks to the // block splitting done by the indirect call transformer. // if (JitConfig.JitClassProfiling() > 0) @@ -499,7 +499,7 @@ void Compiler::fgInstrumentMethod() // Only works when jitting. assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)); - if ((block->bbFlags & BBF_HAS_VIRTUAL_CALL) != 0) + if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) != 0) { // Would be nice to avoid having to search here by tracking // candidates more directly. @@ -590,17 +590,18 @@ void Compiler::fgInstrumentMethod() // Initialize the class table // - // Hack: we use the high bits of the offset to indicate that this record + // Hack: we use two high bits of the offset to indicate that this record // is the start of a class profile, and what kind of call is being profiled. // IL_OFFSET offset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset); - assert((int)offset >= 0); + assert((offset & (ICorJitInfo::ClassProfile::CLASS_FLAG | + ICorJitInfo::ClassProfile::INTERFACE_FLAG)) == 0); - offset |= 0x80000000; + offset |= ICorJitInfo::ClassProfile::CLASS_FLAG; if (call->IsVirtualStub()) { - offset |= 0x40000000; + offset |= ICorJitInfo::ClassProfile::INTERFACE_FLAG; } else { diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 477d0ef0c8de51..4b18f38548530e 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -20599,7 +20599,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // Flag block as needing scrutiny // - compCurBB->bbFlags |= BBF_HAS_VIRTUAL_CALL; + compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE; } return; diff --git a/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp index cdb4543d672bf7..f922a50c4a3946 100644 --- a/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp @@ -26,11 +26,11 @@ class CORJIT_FLAGS uint64_t corJitFlags; }; -static const GUID JITEEVersionIdentifier ={ /* 8031aa05-4568-40fc-a0d2-d971d8edba16 */ - 0x8031aa05, - 0x4568, - 0x40fc, - {0xa0, 0xd2, 0xd9, 0x71, 0xd8, 0xed, 0xba, 0x16} +static const GUID JITEEVersionIdentifier = { /* 0d235fe4-65a1-487a-8553-c845496da901 */ + 0x0d235fe4, + 0x65a1, + 0x487a, + {0x85, 0x53, 0xc8, 0x45, 0x49, 0x6d, 0xa9, 0x01} }; class Jit diff --git a/src/coreclr/src/vm/jithelpers.cpp b/src/coreclr/src/vm/jithelpers.cpp index 419260ecec2698..14d1cc975e10c7 100644 --- a/src/coreclr/src/vm/jithelpers.cpp +++ b/src/coreclr/src/vm/jithelpers.cpp @@ -5243,9 +5243,10 @@ HCIMPL2(void, JIT_ClassProfile, Object *obj, void* tableAddress) VALIDATEOBJECTREF(objRef); ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*) tableAddress; - const int count = classProfile->Count++; - const int S = ICorJitInfo::ClassProfile::SIZE; - const int N = ICorJitInfo::ClassProfile::SAMPLE_INTERVAL; + volatile unsigned* pCount = (volatile unsigned*) &classProfile->Count; + const unsigned count = *pCount++; + const unsigned S = ICorJitInfo::ClassProfile::SIZE; + const unsigned N = ICorJitInfo::ClassProfile::SAMPLE_INTERVAL; if (objRef == NULL) { @@ -5272,7 +5273,7 @@ HCIMPL2(void, JIT_ClassProfile, Object *obj, void* tableAddress) // intentionally simple so we can have multithreaded // access w/o tearing state. // - static unsigned s_rng = 100; + static volatile unsigned s_rng = 100; unsigned x = s_rng; x ^= x << 13; diff --git a/src/coreclr/src/vm/jitinterface.cpp b/src/coreclr/src/vm/jitinterface.cpp index 3da08e35a82724..6515a798ba588e 100644 --- a/src/coreclr/src/vm/jitinterface.cpp +++ b/src/coreclr/src/vm/jitinterface.cpp @@ -11946,7 +11946,7 @@ CORINFO_CLASS_HANDLE CEEJitInfo::getLikelyClass( #ifdef FEATURE_PGO - // If TieredPGO is enabled, query the per call site class profile. + // Query the PGO manager's per call site class profile. // MethodDesc* pMD = (MethodDesc*)ftnHnd; unsigned codeSize = 0; diff --git a/src/coreclr/src/vm/pgo.cpp b/src/coreclr/src/vm/pgo.cpp index a4e5255203a407..92d14ebc91d081 100644 --- a/src/coreclr/src/vm/pgo.cpp +++ b/src/coreclr/src/vm/pgo.cpp @@ -159,7 +159,7 @@ void PgoManager::WritePgoData() const unsigned thisOffset = records[i].ILOffset; - if (((int) thisOffset) < 0) + if ((thisOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) { // remainder must be class probe data hasClassProfile = true; @@ -195,18 +195,16 @@ void PgoManager::WritePgoData() // const char* profileType = "virtual"; - if (classProfile->ILOffset & 0x40000000) + if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::INTERFACE_FLAG) != 0) { profileType = "interface"; } // "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; // - fprintf(pgoDataFile, s_ClassProfileHeader, (classProfile->ILOffset & 0x0FFFFFFF), + fprintf(pgoDataFile, s_ClassProfileHeader, (classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK), classProfile->Count, h.m_count, h.m_totalCount, profileType); - fflush(pgoDataFile); - for (unsigned j = 0; j < h.m_count; j++) { CORINFO_CLASS_HANDLE clsHnd = h.m_histogram[j].m_mt; @@ -222,8 +220,6 @@ void PgoManager::WritePgoData() // Advance to next entry. // i += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); - - fflush(pgoDataFile); } } @@ -536,7 +532,7 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize // while (j < header->recordCount) { - if (((int)s_PgoData[index + j].ILOffset) < 0) + if ((s_PgoData[index + j].ILOffset && ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) { break; } @@ -552,7 +548,7 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize { const ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[index + j]; - if ((classProfile->ILOffset & 0x0FFFFFFF) != ilOffset) + if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK) != ilOffset) { // Need to make sure this is even divisor // From f490d8cf9b25d13544c40c4dc06aa2f52c665310 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 08:58:23 -0800 Subject: [PATCH 05/10] fix compile issue --- src/coreclr/src/vm/pgo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/vm/pgo.cpp b/src/coreclr/src/vm/pgo.cpp index 92d14ebc91d081..93fa4f7b9d2f28 100644 --- a/src/coreclr/src/vm/pgo.cpp +++ b/src/coreclr/src/vm/pgo.cpp @@ -532,7 +532,7 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize // while (j < header->recordCount) { - if ((s_PgoData[index + j].ILOffset && ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) + if ((s_PgoData[index + j].ILOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) { break; } From 48d757b076ddebcc8bbc8a4ad4579ac7a955996e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 11:11:28 -0800 Subject: [PATCH 06/10] crossgen2 getLikelyClass must return value --- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 07b3beffc38c90..7abafbc01467c7 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2342,7 +2342,9 @@ private HRESULT getMethodBlockCounts(CORINFO_METHOD_STRUCT_* ftnHnd, ref uint pC { throw new NotImplementedException("getBBProfileData"); } private CORINFO_CLASS_STRUCT_* getLikelyClass(CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint IlOffset, ref uint pLikelihood, ref uint pNumberOfClasses) - { throw new NotImplementedException("getLikelyClass"); } + { + return 0; + } private void getAddressOfPInvokeTarget(CORINFO_METHOD_STRUCT_* method, ref CORINFO_CONST_LOOKUP pLookup) { From b72de10143b9cb9912b24d235c223718bb76f31b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 12:03:00 -0800 Subject: [PATCH 07/10] null, not 0 --- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 7abafbc01467c7..22d2d8515f788f 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2343,7 +2343,7 @@ private HRESULT getMethodBlockCounts(CORINFO_METHOD_STRUCT_* ftnHnd, ref uint pC private CORINFO_CLASS_STRUCT_* getLikelyClass(CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint IlOffset, ref uint pLikelihood, ref uint pNumberOfClasses) { - return 0; + return null; } private void getAddressOfPInvokeTarget(CORINFO_METHOD_STRUCT_* method, ref CORINFO_CONST_LOOKUP pLookup) From 7b7efc9a198a7572769ffa2801d1fed2280e59cc Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 15:12:29 -0800 Subject: [PATCH 08/10] Restore the logic to do guarded devirt for predicatble interface calls. Not sure how this got lost. --- src/coreclr/src/jit/importer.cpp | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 4b18f38548530e..06cd27d0e67dc2 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -20813,6 +20813,15 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } + // Some of these may be redundant + // + DWORD likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod); + DWORD likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass); + + // Try guarded devirtualization. + // + addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs, + likelihood); return; } @@ -21426,20 +21435,6 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, static ConfigMethodRange JitGuardedDevirtualizationRange; JitGuardedDevirtualizationRange.EnsureInit(JitConfig.JitGuardedDevirtualizationRange()); assert(!JitGuardedDevirtualizationRange.Error()); - - static bool first = true; - - if (first) - { - first = false; - - if (!JitGuardedDevirtualizationRange.IsEmpty()) - { - printf("**** Range-based GDV enabled for ****\n"); - JitGuardedDevirtualizationRange.Dump(); - } - } - if (!JitGuardedDevirtualizationRange.Contains(impInlineRoot()->info.compMethodHash())) { JITDUMP("NOT Marking call [%06u] as guarded devirtualization candidate -- excluded by " From 23f26bf71a6b61d5a1404485c01668e6dbf2f242 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 18:47:48 -0800 Subject: [PATCH 09/10] Generalize handling of the case where the dispatch object has a class type to work with shared classes (like __Canon). Also note we can reach this stage for both virtual and interface calls. --- src/coreclr/src/jit/importer.cpp | 129 ++++++++++++++++--------------- 1 file changed, 67 insertions(+), 62 deletions(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 06cd27d0e67dc2..03fec314548c28 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -20520,7 +20520,7 @@ bool Compiler::IsMathIntrinsic(GenTree* tree) // method -- [IN/OUT] the method handle for call. Updated iff call devirtualized. // methodFlags -- [IN/OUT] flags for the method to call. Updated iff call devirtualized. // contextHandle -- [IN/OUT] context handle for the call. Updated iff call devirtualized. -// exactContextHnd -- [OUT] updated context handle iff call devirtualized +// exactContextHandle -- [OUT] updated context handle iff call devirtualized // isLateDevirtualization -- if devirtualization is happening after importation // isExplicitTailCalll -- [IN] true if we plan on using an explicit tail call // ilOffset -- IL offset of the call @@ -20749,16 +20749,9 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // IL_021e: callvirt instance int32 System.Object::GetHashCode() // // If so, we can't devirtualize, but we may be able to do guarded devirtualization. + // if ((objClassAttribs & CORINFO_FLG_INTERFACE) != 0) { - // If we're called during early devirtualiztion, attempt guarded devirtualization - // if there's currently just one implementing class. - if (exactContextHandle == nullptr) - { - JITDUMP("--- obj class is interface...unable to dervirtualize, sorry\n"); - return; - } - // Don't try guarded devirtualiztion when we're doing late devirtualization. // if (isLateDevirtualization) @@ -20767,7 +20760,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - JITDUMP("Consdering guarded devirt...\n"); + JITDUMP("Considering guarded devirt...\n"); // See if the runtime can provide a class to guess for. // @@ -20813,6 +20806,8 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } + JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr)); + // Some of these may be redundant // DWORD likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod); @@ -20833,62 +20828,73 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, JITDUMP("--- base class is interface\n"); } - // Fetch the method that would be called based on the declared type of 'this' + // Fetch the method that would be called based on the declared type of 'this', + // and prepare to fetch the method attributes. + // CORINFO_CONTEXT_HANDLE ownerType = *contextHandle; CORINFO_METHOD_HANDLE derivedMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, objClass, ownerType); - // If we failed to get a handle, we can't devirtualize. This can - // happen when prejitting, if the devirtualization crosses - // servicing bubble boundaries. - // - // Note if we have some way of guessing a better and more likely type we can do something similar to the code - // above for the case where the best jit type is an interface type. - if (derivedMethod == nullptr) - { - JITDUMP("--- no derived method, sorry\n"); - return; - } - - // Fetch method attributes to see if method is marked final. - DWORD derivedMethodAttribs = info.compCompHnd->getMethodAttribs(derivedMethod); - const bool derivedMethodIsFinal = ((derivedMethodAttribs & CORINFO_FLG_FINAL) != 0); + DWORD derivedMethodAttribs = 0; + bool derivedMethodIsFinal = false; + bool canDevirtualize = false; #if defined(DEBUG) const char* derivedClassName = "?derivedClass"; const char* derivedMethodName = "?derivedMethod"; + const char* note = "inexact or not final"; +#endif - const char* note = "inexact or not final"; - if (isExact) - { - note = "exact"; - } - else if (objClassIsFinal) + // If we failed to get a method handle, we can't directly devirtualize. + // + // This can happen when prejitting, if the devirtualization crosses + // servicing bubble boundaries, or if objClass is a shared class. + // + if (derivedMethod == nullptr) { - note = "final class"; + JITDUMP("--- no derived method\n"); } - else if (derivedMethodIsFinal) + else { - note = "final method"; - } + // Fetch method attributes to see if method is marked final. + derivedMethodAttribs = info.compCompHnd->getMethodAttribs(derivedMethod); + derivedMethodIsFinal = ((derivedMethodAttribs & CORINFO_FLG_FINAL) != 0); - if (verbose || doPrint) - { - derivedMethodName = eeGetMethodName(derivedMethod, &derivedClassName); - if (verbose) +#if defined(DEBUG) + if (isExact) { - printf(" devirt to %s::%s -- %s\n", derivedClassName, derivedMethodName, note); - gtDispTree(call); + note = "exact"; + } + else if (objClassIsFinal) + { + note = "final class"; + } + else if (derivedMethodIsFinal) + { + note = "final method"; + } + + if (verbose || doPrint) + { + derivedMethodName = eeGetMethodName(derivedMethod, &derivedClassName); + if (verbose) + { + printf(" devirt to %s::%s -- %s\n", derivedClassName, derivedMethodName, note); + gtDispTree(call); + } } - } #endif // defined(DEBUG) - const bool canDevirtualize = isExact || objClassIsFinal || (!isInterface && derivedMethodIsFinal); + canDevirtualize = isExact || objClassIsFinal || (!isInterface && derivedMethodIsFinal); + } + // We still might be able to do a guarded devirtualization. + // Note the call might be an interface call or a virtual call. + // if (!canDevirtualize) { JITDUMP(" Class not final or exact%s\n", isInterface ? "" : ", and method not final"); - // Don't try guarded devirtualiztion when we're doing late devirtualization. + // Don't try guarded devirtualiztion if we're doing late devirtualization. // if (isLateDevirtualization) { @@ -20900,12 +20906,9 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // See if there's a likely guess for the class. // - // pass objClass here, or baseClass? - // always query, or do so under condition? - // - const unsigned virtualLikelihoodThreshold = 30; - unsigned likelihood = 0; - unsigned numberOfClasses = 0; + const unsigned likelihoodThreshold = isInterface ? 25 : 30; + unsigned likelihood = 0; + unsigned numberOfClasses = 0; CORINFO_CLASS_HANDLE likelyClass = info.compCompHnd->getLikelyClass(info.compMethodHnd, baseClass, ilOffset, &likelihood, &numberOfClasses); @@ -20914,9 +20917,11 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, JITDUMP("Likely class for %p (%s) is %p (%s) [likelihood:%u classes seen:%u]\n", dspPtr(objClass), objClassName, likelyClass, eeGetClassName(likelyClass), likelihood, numberOfClasses); } - else + else if (derivedMethod != nullptr) { - // Have we enabled guarded devirtualization by guessing the jit's best class? + // If we have a derived method we can optionally guess for + // the class that introduces the method. + // bool guessJitBestClass = true; INDEBUG(guessJitBestClass = (JitConfig.JitGuardedDevirtualizationGuessBestClass() > 0);); @@ -20927,12 +20932,12 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, } // We will use the class that introduced the method as our guess - // for the runtime class of othe object. + // for the runtime class of the object. // // We don't know now likely this is; just choose a value that gets - // past the threshold. + // us past the threshold. likelyClass = info.compCompHnd->getMethodClass(derivedMethod); - likelihood = virtualLikelihoodThreshold; + likelihood = likelihoodThreshold; JITDUMP("Will guess implementing class for class %p (%s) is %p (%s)!\n", dspPtr(objClass), objClassName, likelyClass, eeGetClassName(likelyClass)); @@ -20941,13 +20946,13 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // Todo: a more advanced heuristic using likelihood, number of // classes, and the profile count for this block. // - // For now we will guess if the likelihood is 30% or more, as studies - // have shown this should pay off for interface calls. + // For now we will guess if the likelihood is at least 25%/30% (intfc/virt), as studies + // have shown this transformation should pay off even if we guess wrong sometimes. // - if (likelihood < virtualLikelihoodThreshold) + if (likelihood < likelihoodThreshold) { - JITDUMP("Not guessing for class; likelihood is below virtual call threshold %u\n", - virtualLikelihoodThreshold); + JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n", callKind, + likelihoodThreshold); return; } @@ -20962,7 +20967,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - JITDUMP("Virtual call would invoke method %s\n", eeGetMethodName(likelyMethod, nullptr)); + JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr)); // Some of these may be redundant // From 95c6d8e1009d324f4a44e7ee6e8d4fcdd8b07403 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Nov 2020 19:19:53 -0800 Subject: [PATCH 10/10] review feedback --- src/coreclr/src/vm/jithelpers.cpp | 1 + src/coreclr/src/vm/pgo.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/vm/jithelpers.cpp b/src/coreclr/src/vm/jithelpers.cpp index 14d1cc975e10c7..0ef036d815d8a6 100644 --- a/src/coreclr/src/vm/jithelpers.cpp +++ b/src/coreclr/src/vm/jithelpers.cpp @@ -5247,6 +5247,7 @@ HCIMPL2(void, JIT_ClassProfile, Object *obj, void* tableAddress) const unsigned count = *pCount++; const unsigned S = ICorJitInfo::ClassProfile::SIZE; const unsigned N = ICorJitInfo::ClassProfile::SAMPLE_INTERVAL; + _ASSERTE(N >= S); if (objRef == NULL) { diff --git a/src/coreclr/src/vm/pgo.cpp b/src/coreclr/src/vm/pgo.cpp index 93fa4f7b9d2f28..3b7ab178984ef9 100644 --- a/src/coreclr/src/vm/pgo.cpp +++ b/src/coreclr/src/vm/pgo.cpp @@ -16,20 +16,28 @@ const char* const PgoManager::s_RecordString = "ilOffs %u count %u\n"; const char* const PgoManager::s_ClassProfileHeader = "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; const char* const PgoManager::s_ClassProfileEntry = "class %p (%s) count %u\n"; +// Data item in class profile histogram +// struct HistogramEntry { + // Class that was observed at runtime CORINFO_CLASS_HANDLE m_mt; + // Number of observations in the table unsigned m_count; }; +// Summarizes a ClassProfile table by forming a Histogram +// struct Histogram { Histogram(const ICorJitInfo::ClassProfile* classProfile); - // Number of nonzero entries in the table + // Number of nonzero entries in the histogram unsigned m_count; - // Total count from all entries in the table + // Sum of counts from all entries in the histogram unsigned m_totalCount; + // Histogram entries, in no particular order. + // The first m_count of these will be valid. HistogramEntry m_histogram[ICorJitInfo::ClassProfile::SIZE]; }; @@ -479,6 +487,12 @@ HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT3 return E_NOTIMPL; } +// See if there is a class profile for this method at the indicated il Offset. +// If so, return the most frequently seen class, along with the likelihood that +// it was the class seen, and the total number of classes seen. +// +// Return NULL if there is no profile data to be found. +// CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses) { *pLikelihood = 0;