diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index 791b7f6c5c1e48..72d8abc1c1dbf5 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5526,9 +5526,15 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: printf("B %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: + printf("B %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset)); + break; case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: printf("E %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: + printf("E %llu", *(uint64_t*)(pInstrumentationData + pBuf[i].Offset)); + break; case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: printf("T %u", *(unsigned*)(pInstrumentationData + pBuf[i].Offset)); break; @@ -6704,28 +6710,40 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno size_t minOffset = (size_t) ~0; size_t maxOffset = 0; - uint32_t totalCount = 0; + uint64_t totalCount = 0; if (SUCCEEDED(pgoHR)) { - // Locate the range of the counter data. + // Locate the range of the data. // for (UINT32 i = 0; i < schemaCount; i++) { - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) - || (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount)) + size_t start = schema[i].Offset; + size_t end; + switch (schema[i].InstrumentationKind) { - if (schema[i].Offset < minOffset) - { - minOffset = schema[i].Offset; - } + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + totalCount += *(uint32_t*)(schemaData + schema[i].Offset); + end = start + 4; + break; + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: + totalCount += *(uint64_t*)(schemaData + schema[i].Offset); + end = start + 8; + break; + default: + continue; + } - if (schema[i].Offset > maxOffset) - { - maxOffset = schema[i].Offset; - } + if (start < minOffset) + { + minOffset = start; + } - totalCount += *(uint32_t*)(schemaData + schema[i].Offset); + if (end > maxOffset) + { + maxOffset = end; } } @@ -6734,10 +6752,10 @@ int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool igno if (minOffset < maxOffset) { char pgoHash[MD5_HASH_BUFFER_SIZE]; - dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset + sizeof(int) - minOffset), pgoHash, + dumpMD5HashToBuffer(schemaData + minOffset, (int)(maxOffset - minOffset), pgoHash, MD5_HASH_BUFFER_SIZE); - t = sprintf_s(buff, len, " Pgo Counters %u, Count %u, Hash: %s", schemaCount, totalCount, pgoHash); + t = sprintf_s(buff, len, " Pgo Counters %u, Count %llu, Hash: %s", schemaCount, totalCount, pgoHash); buff += t; len -= t; } @@ -6780,6 +6798,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool { hasEdgeProfile = false; hasClassProfile = false; + hasLikelyClass = false; // Obtain the Method Info structure for this method CORINFO_METHOD_INFO info; @@ -6799,6 +6818,7 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool for (UINT32 i = 0; i < schemaCount; i++) { hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); + hasEdgeProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount); hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount); hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass); diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index 2407a407ac5d3d..aaf4dcb646f11a 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -357,12 +357,14 @@ class ICorJitInfo : public ICorDynamicInfo DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int + EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data }; diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 18d37a489d5f31..3af1528a314740 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -7905,13 +7905,13 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) /***************************************************************************** * - * Display an reloc value - * If we are formatting for an assembly listing don't print the hex value + * Display a reloc value + * If we are formatting for a diffable assembly listing don't print the hex value * since it will prevent us from doing assembly diffs */ void emitter::emitDispReloc(ssize_t value) { - if (emitComp->opts.disAsm) + if (emitComp->opts.disAsm && emitComp->opts.disDiffable) { printf("(reloc)"); } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 85361a618a9ffb..3fb8254baf5728 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -194,12 +194,22 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei for (UINT32 i = 0; i < fgPgoSchemaCount; i++) { - if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && - ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset)) + if ((IL_OFFSET)fgPgoSchema[i].ILOffset != offset) + { + continue; + } + + if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) { *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset); return true; } + + if (fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount) + { + *weightWB = (BasicBlock::weight_t) * (uint64_t*)(fgPgoData + fgPgoSchema[i].Offset); + return true; + } } *weightWB = 0; @@ -334,9 +344,11 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche ICorJitInfo::PgoInstrumentationSchema schemaElem; schemaElem.Count = 1; schemaElem.Other = 0; - schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; - schemaElem.ILOffset = offset; - schemaElem.Offset = 0; + schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() + ? ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount + : ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; + schemaElem.ILOffset = offset; + schemaElem.Offset = 0; schema.push_back(schemaElem); @@ -362,21 +374,23 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche // void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory) { - const int schemaIndex = (int)block->bbCountSchemaIndex; + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex]; - assert(block->bbCodeOffs == (IL_OFFSET)schema[schemaIndex].ILOffset); - assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount); - size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory); + assert(block->bbCodeOffs == (IL_OFFSET)entry.ILOffset); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)); + size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value - GenTree* valueNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); // Increment value by 1 - GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1)); + GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ)); // Write new Basic-Block count value - GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode); m_comp->fgNewStmtAtBeg(block, asgNode); @@ -411,11 +425,12 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile assert(m_entryBlock != nullptr); assert(m_entryBlock->bbCodeOffs == 0); - const int firstSchemaIndex = (int)m_entryBlock->bbCountSchemaIndex; - assert((IL_OFFSET)schema[firstSchemaIndex].ILOffset == 0); - assert(schema[firstSchemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount); + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[m_entryBlock->bbCountSchemaIndex]; + assert((IL_OFFSET)entry.ILOffset == 0); + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)); - const size_t addrOfFirstExecutionCount = (size_t)(schema[firstSchemaIndex].Offset + profileMemory); + const size_t addrOfFirstExecutionCount = (size_t)(entry.Offset + profileMemory); GenTree* arg; @@ -447,13 +462,15 @@ void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profile GenTreeCall::Use* args = m_comp->gtNewCallArgs(arg); GenTree* call = m_comp->gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value // - GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* valueNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); // Compare Basic-Block count value against zero // - GenTree* relop = m_comp->gtNewOperNode(GT_NE, TYP_INT, valueNode, m_comp->gtNewIconNode(0, TYP_INT)); + GenTree* relop = m_comp->gtNewOperNode(GT_NE, typ, valueNode, m_comp->gtNewIconNode(0, typ)); GenTree* colon = new (m_comp, GT_COLON) GenTreeColon(TYP_VOID, m_comp->gtNewNothingNode(), call); GenTree* cond = m_comp->gtNewQmarkNode(TYP_VOID, relop, colon); Statement* stmt = m_comp->gtNewStmt(cond); @@ -1041,9 +1058,11 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche ICorJitInfo::PgoInstrumentationSchema schemaElem; schemaElem.Count = 1; schemaElem.Other = targetOffset; - schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::EdgeIntCount; - schemaElem.ILOffset = sourceOffset; - schemaElem.Offset = 0; + schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() + ? ICorJitInfo::PgoInstrumentationKind::EdgeLongCount + : ICorJitInfo::PgoInstrumentationKind::EdgeIntCount; + schemaElem.ILOffset = sourceOffset; + schemaElem.Offset = 0; schema.push_back(schemaElem); @@ -1082,9 +1101,12 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem // Sanity checks. // assert((schemaIndex >= 0) && (schemaIndex < (int)schema.size())); - assert(schema[schemaIndex].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount); - size_t addrOfCurrentExecutionCount = (size_t)(schema[schemaIndex].Offset + profileMemory); + const ICorJitInfo::PgoInstrumentationSchema& entry = schema[schemaIndex]; + assert((entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount) || + (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeLongCount)); + + size_t addrOfCurrentExecutionCount = (size_t)(entry.Offset + profileMemory); // Determine where to place the probe. // @@ -1124,16 +1146,17 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem // Place the probe + var_types typ = + entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount ? TYP_INT : TYP_LONG; // Read Basic-Block count value GenTree* valueNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); // Increment value by 1 - GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, TYP_INT, valueNode, m_comp->gtNewIconNode(1)); + GenTree* rhsNode = m_comp->gtNewOperNode(GT_ADD, typ, valueNode, m_comp->gtNewIconNode(1, typ)); // Write new Basic-Block count value - GenTree* lhsNode = - m_comp->gtNewIndOfIconHandleNode(TYP_INT, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); + GenTree* lhsNode = m_comp->gtNewIndOfIconHandleNode(typ, addrOfCurrentExecutionCount, GTF_ICON_BBC_PTR, false); GenTree* asgNode = m_comp->gtNewAssignNode(lhsNode, rhsNode); m_comp->fgNewStmtAtBeg(instrumentedBlock, asgNode); @@ -1725,10 +1748,12 @@ PhaseStatus Compiler::fgIncorporateProfileData() break; case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + case ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount: fgPgoBlockCounts++; break; case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: fgPgoEdgeCounts++; break; @@ -2157,12 +2182,16 @@ void EfficientEdgeCountReconstructor::Prepare() switch (schemaEntry.InstrumentationKind) { case ICorJitInfo::PgoInstrumentationKind::EdgeIntCount: + case ICorJitInfo::PgoInstrumentationKind::EdgeLongCount: { // Optimization TODO: if profileCount is zero, we can just ignore this edge // and the right things will happen. // - uint32_t const profileCount = *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset); - BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount; + uint64_t const profileCount = + schemaEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount + ? *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset) + : *(uint64_t*)(m_comp->fgPgoData + schemaEntry.Offset); + BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount; m_allWeightsZero &= (profileCount == 0); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a0b08aa3bd1db5..7b0f8239a9bd9b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -15422,7 +15422,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (allocSize <= maxSize) { const unsigned stackallocAsLocal = lvaGrabTemp(false DEBUGARG("stackallocLocal")); - JITDUMP("Converting stackalloc of %lld bytes to new local V%02u\n", allocSize, + JITDUMP("Converting stackalloc of %zd bytes to new local V%02u\n", allocSize, stackallocAsLocal); lvaTable[stackallocAsLocal].lvType = TYP_BLK; lvaTable[stackallocAsLocal].lvExactSize = (unsigned)allocSize; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d33f172b3addf9..6993c2dd2c1222 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -475,6 +475,7 @@ CONFIG_INTEGER(JitMinimalJitProfiling, W("JitMinimalJitProfiling"), 1) CONFIG_INTEGER(JitMinimalPrejitProfiling, W("JitMinimalPrejitProfiling"), 0) CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1) CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1) +CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values. // Profile consumption options CONFIG_INTEGER(JitDisablePgo, W("JitDisablePgo"), 0) // Ignore pgo data for all methods diff --git a/src/coreclr/tools/Common/Pgo/PgoFormat.cs b/src/coreclr/tools/Common/Pgo/PgoFormat.cs index 3e0013eb1a2351..e9357ad361473b 100644 --- a/src/coreclr/tools/Common/Pgo/PgoFormat.cs +++ b/src/coreclr/tools/Common/Pgo/PgoFormat.cs @@ -36,12 +36,14 @@ public enum PgoInstrumentationKind DescriptorMin = 0x40, Done = None, // All instrumentation schemas must end with a record which is "Done" - BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int + BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int TypeHandleHistogramCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema - EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int + EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int + EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data } @@ -476,7 +478,7 @@ private static bool SchemaMergesItemsWithDifferentOtherFields(PgoInstrumentation { switch (kind) { - // + // default: // All non-specified kinds are not distinguishable by Other field return false;