From c3b838dce9e623ff343be25a90d55f868054471e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Apr 2021 16:14:17 +0200 Subject: [PATCH 1/6] Add option to collect 64-bit counts Add COMPlus_JitCollect64BitCounts which makes the JIT instrument using 64-bit counts instead of 32-bit counts. No support for consuming these counts is added, only support for producing them. I also changed the printing of relocs to include their values when diffable disassembly is off. --- .../superpmi-shared/methodcontext.cpp | 10 +-- src/coreclr/inc/corjit.h | 6 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/emitxarch.cpp | 6 +- src/coreclr/jit/fgprofile.cpp | 75 +++++++++++-------- src/coreclr/jit/jitconfigvalues.h | 2 + src/coreclr/tools/Common/Pgo/PgoFormat.cs | 10 ++- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 2 +- src/coreclr/zap/zapinfo.cpp | 4 +- 9 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index f46cde27804837..e3ca741c96d540 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5531,10 +5531,10 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti switch((ICorJitInfo::PgoInstrumentationKind)pBuf[i].InstrumentationKind) { - case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count: printf("B %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; - case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: printf("E %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: @@ -6720,8 +6720,8 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno // for (UINT32 i = 0; i < schemaCount; i++) { - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) - || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount)) + if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) + || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count)) { if (schema[i].Offset < minOffset) { @@ -6806,7 +6806,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool { for (UINT32 i = 0; i < schemaCount; i++) { - hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); + hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count); hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount); hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass); diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index 2407a407ac5d3d..53b4cd4fb72f95 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -357,12 +357,14 @@ class ICorJitInfo : public ICorDynamicInfo DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + BasicBlockU32Count = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockU64Count = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int + EdgeU32Count = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeU64Count = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index feaf24b8def064..4181d678dfd684 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5879,7 +5879,7 @@ void Compiler::compCompileFinish() for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) { if ((fgPgoSchema[iSchema].InstrumentationKind == - ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && + ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) && (fgPgoSchema[iSchema].ILOffset == 0)) { foundEntrypointBasicBlockCount = true; diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 18d37a489d5f31..3af1528a314740 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -7905,13 +7905,13 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) /***************************************************************************** * - * Display an reloc value - * If we are formatting for an assembly listing don't print the hex value + * Display a reloc value + * If we are formatting for a diffable assembly listing don't print the hex value * since it will prevent us from doing assembly diffs */ void emitter::emitDispReloc(ssize_t value) { - if (emitComp->opts.disAsm) + if (emitComp->opts.disAsm && emitComp->opts.disDiffable) { printf("(reloc)"); } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index dcbf7f171e17cb..21b6c68d2b0928 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -194,7 +194,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei for (UINT32 i = 0; i < fgPgoSchemaCount; i++) { - if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && + if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) && ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset)) { *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset); @@ -334,9 +334,11 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche ICorJitInfo::PgoInstrumentationSchema schemaElem; schemaElem.Count = 1; schemaElem.Other = 0; - schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; - schemaElem.ILOffset = offset; - schemaElem.Offset = 0; + schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() + ? ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count + : ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count; + schemaElem.ILOffset = offset; + schemaElem.Offset = 0; schema.push_back(schemaElem); @@ -362,21 +364,23 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche // void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory) { - const int schemaIndex = (int)block->bbCountSchemaIndex; + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex]; - assert(block->bbCodeOffs == (IL_OFFSET)schema[schemaIndex].ILOffset); - assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount); - size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory); + assert(block->bbCodeOffs == (IL_OFFSET)entry.ILOffset); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count)); + size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count ? TYP_INT : TYP_LONG; // Read Basic-Block count value - GenTree* valueNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); // Increment value by 1 - GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1)); + GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ)); // Write new Basic-Block count value - GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode); m_comp->fgNewStmtAtBeg(block, asgNode); @@ -411,11 +415,12 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile assert(m_entryBlock != nullptr); assert(m_entryBlock->bbCodeOffs == 0); - const int firstSchemaIndex = (int)m_entryBlock->bbCountSchemaIndex; - assert((IL_OFFSET)schema[firstSchemaIndex].ILOffset == 0); - assert(schema[firstSchemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount); + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[m_entryBlock->bbCountSchemaIndex]; + assert((IL_OFFSET)entry.ILOffset == 0); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count)); - const size_t addrOfFirstExecutionCount = (size_t)(schema[firstSchemaIndex].Offset + profileMemory); + const size_t addrOfFirstExecutionCount = (size_t)(entry.Offset + profileMemory); GenTree* arg; @@ -447,13 +452,15 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile GenTreeCall::Use* args = m_comp->gtNewCallArgs(arg); GenTree* call = m_comp->gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count ? TYP_INT : TYP_LONG; // Read Basic-Block count value // - GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); // Compare Basic-Block count value against zero // - GenTree* relop = m_comp->gtNewOperNode(GT_NE, TYP_INT, valueNode, m_comp->gtNewIconNode(0, TYP_INT)); + GenTree* relop = m_comp->gtNewOperNode(GT_NE, typ, valueNode, m_comp->gtNewIconNode(0, typ)); GenTree* colon = new (m_comp, GT_COLON) GenTreeColon(TYP_VOID, m_comp->gtNewNothingNode(), call); GenTree* cond = m_comp->gtNewQmarkNode(TYP_VOID, relop, colon); Statement* stmt = m_comp->gtNewStmt(cond); @@ -1041,9 +1048,11 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche ICorJitInfo::PgoInstrumentationSchema schemaElem; schemaElem.Count = 1; schemaElem.Other = targetOffset; - schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::EdgeIntCount; - schemaElem.ILOffset = sourceOffset; - schemaElem.Offset = 0; + schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() + ? ICorJitInfo::PgoInstrumentationKind::EdgeU64Count + : ICorJitInfo::PgoInstrumentationKind::EdgeU32Count; + schemaElem.ILOffset = sourceOffset; + schemaElem.Offset = 0; schema.push_back(schemaElem); @@ -1082,9 +1091,12 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem // Sanity checks. // assert((schemaIndex >= 0) && (schemaIndex < (int)schema.size())); - assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); - size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory); + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[schemaIndex]; + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU64Count)); + + size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); // Determine where to place the probe. // @@ -1124,16 +1136,17 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem // Place the probe + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count ? TYP_INT : TYP_LONG; // Read Basic-Block count value GenTree* valueNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); // Increment value by 1 - GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1)); + GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ)); // Write new Basic-Block count value - GenTree* lhsNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode); m_comp->fgNewStmtAtBeg(instrumentedBlock, asgNode); @@ -1724,11 +1737,11 @@ PhaseStatus Compiler::fgIncorporateProfileData() fgNumProfileRuns += fgPgoSchema[iSchema].Other; break; - case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count: fgPgoBlockCounts++; break; - case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: fgPgoEdgeCounts++; break; @@ -1962,7 +1975,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor } }; - // Map for correlating EdgeIntCount schema entries with edges + // Map for correlating EdgeU32Count schema entries with edges // typedef JitHashTable EdgeKeyToEdgeMap; EdgeKeyToEdgeMap m_edgeKeyToEdgeMap; @@ -2155,7 +2168,7 @@ void EfficientEdgeCountReconstructor::Prepare() const ICorJitInfo::PgoInstrumentationSchema& schemaEntry = m_comp->fgPgoSchema[iSchema]; switch (schemaEntry.InstrumentationKind) { - case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: { // Optimization TODO: if profileCount is zero, we can just ignore this edge // and the right things will happen. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ff52e6e050e111..48591065a0f9b7 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -473,6 +473,8 @@ CONFIG_INTEGER(JitMinimalJitProfiling, W("JitMinimalJitProfiling"), 1) CONFIG_INTEGER(JitMinimalPrejitProfiling, W("JitMinimalPrejitProfiling"), 0) CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1) CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1) +CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values. These counters + // cannot be consumed. // Profile consumption options CONFIG_INTEGER(JitDisablePgo, W("JitDisablePgo"), 0) // Ignore pgo data for all methods diff --git a/src/coreclr/tools/Common/Pgo/PgoFormat.cs b/src/coreclr/tools/Common/Pgo/PgoFormat.cs index 3e0013eb1a2351..c1ea3e081437f2 100644 --- a/src/coreclr/tools/Common/Pgo/PgoFormat.cs +++ b/src/coreclr/tools/Common/Pgo/PgoFormat.cs @@ -36,12 +36,14 @@ public enum PgoInstrumentationKind DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + BasicBlockU32Count = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockU64Count = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int + EdgeU32Count = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeU64Count = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data } @@ -559,8 +561,8 @@ void MergeInSchemaElem(Dictionary dataMerger, PgoS switch (existingSchemaItem.InstrumentationKind) { - case PgoInstrumentationKind.BasicBlockIntCount: - case PgoInstrumentationKind.EdgeIntCount: + case PgoInstrumentationKind.BasicBlockU32Count: + case PgoInstrumentationKind.EdgeU32Count: case PgoInstrumentationKind.TypeHandleHistogramCount: if ((existingSchemaItem.Count != 1) || (schema.Count != 1)) { diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index f655f0fd2394e3..0197230d65df89 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2441,7 +2441,7 @@ private unsafe HRESULT allocPgoInstrumentationBySchema(CORINFO_METHOD_STRUCT_* f // Validate that each schema item is only used for a basic block count for (uint iSchema = 0; iSchema < countSchemaItems; iSchema++) { - if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind.BasicBlockIntCount) + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind.BasicBlockU32Count) return HRESULT.E_NOTIMPL; if (pSchema[iSchema].Count != 1) return HRESULT.E_NOTIMPL; diff --git a/src/coreclr/zap/zapinfo.cpp b/src/coreclr/zap/zapinfo.cpp index c242d79aa7de9e..b60e6831ba3e88 100644 --- a/src/coreclr/zap/zapinfo.cpp +++ b/src/coreclr/zap/zapinfo.cpp @@ -964,7 +964,7 @@ HRESULT ZapInfo::allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, // Validate that each schema item is only used for a basic block count for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) { - if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind::BasicBlockIntCount) + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind::BasicBlockU32Count) return E_NOTIMPL; if (pSchema[iSchema].Count != 1) return E_NOTIMPL; @@ -1123,7 +1123,7 @@ HRESULT ZapInfo::getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, { PgoInstrumentationSchema blockCountSchema = {}; blockCountSchema.Count = 1; - blockCountSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; + blockCountSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count; blockCountSchema.ILOffset = blockCounts[iSchema].ILOffset; blockCountSchema.Offset = (BYTE *)&blockCounts[iSchema].ExecutionCount - (BYTE*)blockCounts; pgoResults->m_schema.Append(blockCountSchema); From e3d34508144900b4cd19ef2e90f82889d07ca25e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Apr 2021 18:47:07 +0200 Subject: [PATCH 2/6] Rename new instrumentation kinds back --- .../superpmi-shared/methodcontext.cpp | 10 +++--- src/coreclr/inc/corjit.h | 8 ++--- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/fgprofile.cpp | 36 +++++++++---------- src/coreclr/tools/Common/Pgo/PgoFormat.cs | 12 +++---- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 2 +- src/coreclr/zap/zapinfo.cpp | 4 +-- 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index e3ca741c96d540..f46cde27804837 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5531,10 +5531,10 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti switch((ICorJitInfo::PgoInstrumentationKind)pBuf[i].InstrumentationKind) { - case ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: printf("B %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; - case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: + case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: printf("E %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: @@ -6720,8 +6720,8 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno // for (UINT32 i = 0; i < schemaCount; i++) { - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) - || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count)) + if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) + || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount)) { if (schema[i].Offset < minOffset) { @@ -6806,7 +6806,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool { for (UINT32 i = 0; i < schemaCount; i++) { - hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count); + hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount); hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass); diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index 53b4cd4fb72f95..c0562471f28954 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -357,14 +357,14 @@ class ICorJitInfo : public ICorDynamicInfo DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockU32Count = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int - BasicBlockU64Count = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. + BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeU32Count = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int - EdgeU64Count = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. + EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 4181d678dfd684..feaf24b8def064 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5879,7 +5879,7 @@ void Compiler::compCompileFinish() for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) { if ((fgPgoSchema[iSchema].InstrumentationKind == - ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) && + ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && (fgPgoSchema[iSchema].ILOffset == 0)) { foundEntrypointBasicBlockCount = true; diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 21b6c68d2b0928..46feafe4e3f4ed 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -194,7 +194,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei for (UINT32 i = 0; i < fgPgoSchemaCount; i++) { - if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) && + if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset)) { *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset); @@ -335,8 +335,8 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche schemaElem.Count = 1; schemaElem.Other = 0; schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() - ? ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count - : ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count; + ? ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount + : ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; schemaElem.ILOffset = offset; schemaElem.Offset = 0; @@ -367,12 +367,12 @@ void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex]; assert(block->bbCodeOffs == (IL_OFFSET)entry.ILOffset); - assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) || - (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count)); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)); size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); var_types typ = - entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count ? TYP_INT : TYP_LONG; + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); @@ -417,8 +417,8 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile const ICorJitInfo::PgoInstrumentationSchema& entry = schema[m_entryBlock->bbCountSchemaIndex]; assert((IL_OFFSET)entry.ILOffset == 0); - assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count) || - (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU64Count)); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)); const size_t addrOfFirstExecutionCount = (size_t)(entry.Offset + profileMemory); @@ -453,7 +453,7 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile GenTree* call = m_comp->gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); var_types typ = - entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count ? TYP_INT : TYP_LONG; + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value // GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); @@ -1049,8 +1049,8 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche schemaElem.Count = 1; schemaElem.Other = targetOffset; schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() - ? ICorJitInfo::PgoInstrumentationKind::EdgeU64Count - : ICorJitInfo::PgoInstrumentationKind::EdgeU32Count; + ? ICorJitInfo::PgoInstrumentationKind::EdgeLongCount + : ICorJitInfo::PgoInstrumentationKind::EdgeIntCount; schemaElem.ILOffset = sourceOffset; schemaElem.Offset = 0; @@ -1093,8 +1093,8 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem assert((schemaIndex >= 0) && (schemaIndex < (int)schema.size())); const ICorJitInfo::PgoInstrumentationSchema& entry = schema[schemaIndex]; - assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count) || - (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU64Count)); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount)); size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); @@ -1137,7 +1137,7 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem // Place the probe var_types typ = - entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeU32Count ? TYP_INT : TYP_LONG; + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); @@ -1737,11 +1737,11 @@ PhaseStatus Compiler::fgIncorporateProfileData() fgNumProfileRuns += fgPgoSchema[iSchema].Other; break; - case ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: fgPgoBlockCounts++; break; - case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: + case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: fgPgoEdgeCounts++; break; @@ -1975,7 +1975,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor } }; - // Map for correlating EdgeU32Count schema entries with edges + // Map for correlating EdgeIntCount schema entries with edges // typedef JitHashTable EdgeKeyToEdgeMap; EdgeKeyToEdgeMap m_edgeKeyToEdgeMap; @@ -2168,7 +2168,7 @@ void EfficientEdgeCountReconstructor::Prepare() const ICorJitInfo::PgoInstrumentationSchema& schemaEntry = m_comp->fgPgoSchema[iSchema]; switch (schemaEntry.InstrumentationKind) { - case ICorJitInfo::PgoInstrumentationKind::EdgeU32Count: + case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: { // Optimization TODO: if profileCount is zero, we can just ignore this edge // and the right things will happen. diff --git a/src/coreclr/tools/Common/Pgo/PgoFormat.cs b/src/coreclr/tools/Common/Pgo/PgoFormat.cs index c1ea3e081437f2..95e69f514e127b 100644 --- a/src/coreclr/tools/Common/Pgo/PgoFormat.cs +++ b/src/coreclr/tools/Common/Pgo/PgoFormat.cs @@ -36,14 +36,14 @@ public enum PgoInstrumentationKind DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockU32Count = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int - BasicBlockU64Count = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. + BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeU32Count = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int - EdgeU64Count = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. + EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data } @@ -561,8 +561,8 @@ void MergeInSchemaElem(Dictionary dataMerger, PgoS switch (existingSchemaItem.InstrumentationKind) { - case PgoInstrumentationKind.BasicBlockU32Count: - case PgoInstrumentationKind.EdgeU32Count: + case PgoInstrumentationKind.BasicBlockIntCount: + case PgoInstrumentationKind.EdgeIntCount: case PgoInstrumentationKind.TypeHandleHistogramCount: if ((existingSchemaItem.Count != 1) || (schema.Count != 1)) { diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 0197230d65df89..f655f0fd2394e3 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2441,7 +2441,7 @@ private unsafe HRESULT allocPgoInstrumentationBySchema(CORINFO_METHOD_STRUCT_* f // Validate that each schema item is only used for a basic block count for (uint iSchema = 0; iSchema < countSchemaItems; iSchema++) { - if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind.BasicBlockU32Count) + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind.BasicBlockIntCount) return HRESULT.E_NOTIMPL; if (pSchema[iSchema].Count != 1) return HRESULT.E_NOTIMPL; diff --git a/src/coreclr/zap/zapinfo.cpp b/src/coreclr/zap/zapinfo.cpp index b60e6831ba3e88..c242d79aa7de9e 100644 --- a/src/coreclr/zap/zapinfo.cpp +++ b/src/coreclr/zap/zapinfo.cpp @@ -964,7 +964,7 @@ HRESULT ZapInfo::allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, // Validate that each schema item is only used for a basic block count for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) { - if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind::BasicBlockU32Count) + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind::BasicBlockIntCount) return E_NOTIMPL; if (pSchema[iSchema].Count != 1) return E_NOTIMPL; @@ -1123,7 +1123,7 @@ HRESULT ZapInfo::getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, { PgoInstrumentationSchema blockCountSchema = {}; blockCountSchema.Count = 1; - blockCountSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockU32Count; + blockCountSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; blockCountSchema.ILOffset = blockCounts[iSchema].ILOffset; blockCountSchema.Offset = (BYTE *)&blockCounts[iSchema].ExecutionCount - (BYTE*)blockCounts; pgoResults->m_schema.Append(blockCountSchema); From 0dbc40bdeb74d431224dfb7e91d14f83b50d970d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 26 Apr 2021 16:24:04 +0200 Subject: [PATCH 3/6] Consume 64-bit counts in JIT and SPMI --- .../superpmi-shared/methodcontext.cpp | 50 +++++++++++++------ src/coreclr/jit/compiler.cpp | 33 ++++++++---- src/coreclr/jit/fgprofile.cpp | 16 +++++- src/coreclr/jit/importer.cpp | 2 +- 4 files changed, 73 insertions(+), 28 deletions(-) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index f46cde27804837..e9990a4a2e5f57 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5534,9 +5534,15 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: printf("B %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: + printf("B %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset)); + break; case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: printf("E %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: + printf("E %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset)); + break; case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: printf("T %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; @@ -6712,28 +6718,40 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno size_t minOffset = (size_t) ~0; size_t maxOffset = 0; - uint32_t totalCount = 0; + uint64_t totalCount = 0; if (SUCCEEDED(pgoHR)) { - // Locate the range of the counter data. + // Locate the range of the data. // for (UINT32 i = 0; i < schemaCount; i++) { - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) - || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount)) + size_t start = schema[i].Offset; + size_t end; + switch (schema[i].InstrumentationKind) { - if (schema[i].Offset < minOffset) - { - minOffset = schema[i].Offset; - } + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + totalCount += *(uint32_t*)(schemaData + schema[i].Offset); + end = start + 4; + break; + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: + totalCount += *(uint64_t*)(schemaData + schema[i].Offset); + end = start + 8; + break; + default: + continue; + } - if (schema[i].Offset > maxOffset) - { - maxOffset = schema[i].Offset; - } + if (start < minOffset) + { + minOffset = start; + } - totalCount += *(uint32_t*)(schemaData + schema[i].Offset); + if (end > maxOffset) + { + maxOffset = end; } } @@ -6742,10 +6760,10 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno if (minOffset < maxOffset) { char pgoHash[MD5_HASH_BUFFER_SIZE]; - dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset + sizeof(int) - minOffset), pgoHash, + dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset - minOffset), pgoHash, MD5_HASH_BUFFER_SIZE); - t = sprintf_s(buff, len, " Pgo Counters %u, Count %u, Hash: %s", schemaCount, totalCount, pgoHash); + t = sprintf_s(buff, len, " Pgo Counters %u, Count %llu, Hash: %s", schemaCount, totalCount, pgoHash); buff += t; len -= t; } @@ -6788,6 +6806,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool { hasEdgeProfile = false; hasClassProfile = false; + hasLikelyClass = false; // Obtain the Method Info structure for this method CORINFO_METHOD_INFO info; @@ -6807,6 +6826,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool for (UINT32 i = 0; i < schemaCount; i++) { hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); + hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount); hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount); hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index feaf24b8def064..a1d4b1ea03b6af 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5872,20 +5872,33 @@ void Compiler::compCompileFinish() // mdMethodDef __stdcall CEEInfo::getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod) mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd); - unsigned profCallCount = 0; + uint64_t profCallCount = 0; if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) { bool foundEntrypointBasicBlockCount = false; for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) { - if ((fgPgoSchema[iSchema].InstrumentationKind == - ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && - (fgPgoSchema[iSchema].ILOffset == 0)) + const ICorJitInfo::PgoInstrumentationSchema& entry = fgPgoSchema[iSchema]; + if (entry.ILOffset != 0) { - foundEntrypointBasicBlockCount = true; - profCallCount = *(uint32_t*)(fgPgoData + fgPgoSchema[iSchema].Offset); - break; + continue; + } + + if (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) + { + profCallCount = *(uint32_t*)(fgPgoData + entry.Offset); + } + else if (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount) + { + profCallCount = *(uint64_t*)(fgPgoData + entry.Offset); } + else + { + continue; + } + + foundEntrypointBasicBlockCount = true; + break; } assert(foundEntrypointBasicBlockCount); } @@ -5908,15 +5921,15 @@ void Compiler::compCompileFinish() { if (profCallCount <= 9999) { - printf("%4d | ", profCallCount); + printf("%4llu | ", profCallCount); } else if (profCallCount <= 999500) { - printf("%3dK | ", (profCallCount + 500) / 1000); + printf("%3lluK | ", (profCallCount + 500) / 1000); } else { - printf("%3dM | ", (profCallCount + 500000) / 1000000); + printf("%3lluM | ", (profCallCount + 500000) / 1000000); } } else diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 46feafe4e3f4ed..01079dd6dbfe90 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -194,12 +194,22 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei for (UINT32 i = 0; i < fgPgoSchemaCount; i++) { - if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && - ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset)) + if ((IL_OFFSET)fgPgoSchema[i].ILOffset != offset) + { + continue; + } + + if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) { *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset); return true; } + + if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount) + { + *weightWB = (BasicBlock::weight_t) * (uint64_t*)(fgPgoData + fgPgoSchema[i].Offset); + return true; + } } *weightWB = 0; @@ -1738,10 +1748,12 @@ PhaseStatus Compiler::fgIncorporateProfileData() break; case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: fgPgoBlockCounts++; break; case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: fgPgoEdgeCounts++; break; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3ca3f2e4260ef7..55f5f0ba64438b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -15382,7 +15382,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (allocSize <= maxSize) { const unsigned stackallocAsLocal = lvaGrabTemp(false DEBUGARG("stackallocLocal")); - JITDUMP("Converting stackalloc of %lld bytes to new local V%02u\n", allocSize, + JITDUMP("Converting stackalloc of %zd bytes to new local V%02u\n", allocSize, stackallocAsLocal); lvaTable[stackallocAsLocal].lvType = TYP_BLK; lvaTable[stackallocAsLocal].lvExactSize = (unsigned)allocSize; From 842e133d139415d4d55e5ec122e5e4dbee1063cc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 26 Apr 2021 16:56:21 +0200 Subject: [PATCH 4/6] Update some comments --- src/coreclr/inc/corjit.h | 4 ++-- src/coreclr/tools/Common/Pgo/PgoFormat.cs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index c0562471f28954..aaf4dcb646f11a 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -358,13 +358,13 @@ class ICorJitInfo : public ICorDynamicInfo Done = None, // All instrumentation schemas must end with a record which is "Done" BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int - BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int - EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data }; diff --git a/src/coreclr/tools/Common/Pgo/PgoFormat.cs b/src/coreclr/tools/Common/Pgo/PgoFormat.cs index 95e69f514e127b..e9357ad361473b 100644 --- a/src/coreclr/tools/Common/Pgo/PgoFormat.cs +++ b/src/coreclr/tools/Common/Pgo/PgoFormat.cs @@ -37,13 +37,13 @@ public enum PgoInstrumentationKind Done = None, // All instrumentation schemas must end with a record which is "Done" BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int - BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int. Currently only supported for collection. + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int - EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int. Currently only supported for collection. + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data } @@ -478,7 +478,7 @@ private static bool SchemaMergesItemsWithDifferentOtherFields(PgoInstrumentation { switch (kind) { - // + // default: // All non-specified kinds are not distinguishable by Other field return false; From e1b8fa8fb46016266b1386d4c1916df1f726a9f6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 26 Apr 2021 17:35:49 +0200 Subject: [PATCH 5/6] Fix a missing edge count --- src/coreclr/jit/fgprofile.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 064aeb753f70e4..3fb8254baf5728 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2182,12 +2182,16 @@ void EfficientEdgeCountReconstructor::Prepare() switch (schemaEntry.InstrumentationKind) { case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: { // Optimization TODO: if profileCount is zero, we can just ignore this edge // and the right things will happen. // - uint32_t const profileCount = *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset); - BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount; + uint64_t const profileCount = + schemaEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount + ? *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset) + : *(uint64_t*)(m_comp->fgPgoData + schemaEntry.Offset); + BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount; m_allWeightsZero &= (profileCount == 0); From 93ba3acc35ee06afac55a6dfefd741ef34a52071 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 3 May 2021 11:46:20 +0200 Subject: [PATCH 6/6] Fix a comment --- src/coreclr/jit/jitconfigvalues.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d3af53138890b1..6993c2dd2c1222 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -475,8 +475,7 @@ CONFIG_INTEGER(JitMinimalJitProfiling, W("JitMinimalJitProfiling"), 1) CONFIG_INTEGER(JitMinimalPrejitProfiling, W("JitMinimalPrejitProfiling"), 0) CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1) CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1) -CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values. These counters - // cannot be consumed. +CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values. // Profile consumption options CONFIG_INTEGER(JitDisablePgo, W("JitDisablePgo"), 0) // Ignore pgo data for all methods