diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 036642e254f2b7..08fc10a8598970 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2880,31 +2880,15 @@ void Compiler::compInitOptions(JitFlags* jitFlags) fgPgoData = nullptr; fgPgoSchemaCount = 0; fgProfileData_ILSizeMismatch = false; - fgNumProfileRuns = 0; if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)) { HRESULT hr; hr = info.compCompHnd->getPgoInstrumentationResults(info.compMethodHnd, &fgPgoSchema, &fgPgoSchemaCount, &fgPgoData); - if (SUCCEEDED(hr)) - { - fgNumProfileRuns = 0; - for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) - { - if (fgPgoSchema[iSchema].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::NumRuns) - { - fgNumProfileRuns += fgPgoSchema[iSchema].Other; - } - } - - if (fgNumProfileRuns == 0) - fgNumProfileRuns = 1; - } - - JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; schema at %p, counts at %p, %d schema " - "elements, %d runs\n", - info.compFullName, hr, dspPtr(fgPgoSchema), dspPtr(fgPgoData), fgPgoSchemaCount, fgNumProfileRuns); + JITDUMP( + "BBOPT set; query for profile data returned hr %0x, schema at %p, counts at %p, schema element count %d\n", + hr, dspPtr(fgPgoSchema), dspPtr(fgPgoData), fgPgoSchemaCount); // a failed result that also has a non-NULL fgPgoSchema // indicates that the ILSize for the method no longer matches @@ -4416,6 +4400,15 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags compFunctionTraceStart(); + // If profile data is available, incorporate it into the flowgraph. + // Note: the importer is sensitive to block weights, so this has + // to happen before importation. + // + if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) + { + DoPhase(this, PHASE_INCPROFILE, &Compiler::fgIncorporateProfileData); + } + // Import: convert the instrs in each basic block to a tree based intermediate representation // DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ca7057faadf140..b6f23a480b3c2e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5540,6 +5540,8 @@ class Compiler BYTE* fgPgoData; UINT32 fgPgoSchemaCount; UINT32 fgNumProfileRuns; + UINT32 fgPgoBlockCounts; + UINT32 fgPgoClassProfiles; unsigned fgStressBBProf() { @@ -5562,6 +5564,8 @@ class Compiler void fgComputeProfileScale(); bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight); PhaseStatus fgInstrumentMethod(); + PhaseStatus fgIncorporateProfileData(); + void fgIncorporateBlockCounts(); public: // fgIsUsingProfileWeights - returns true if we have real profile data for this method diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 4f304f3c363ab6..23dade2f959265 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -28,7 +28,8 @@ CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", CompPhaseNameMacro(PHASE_INDXCALL, "Indirect call transform", "INDXCALL", false, -1, true) CompPhaseNameMacro(PHASE_PATCHPOINTS, "Expand patchpoints", "PPOINT", false, -1, true) CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1, false) -CompPhaseNameMacro(PHASE_IBCINSTR, "IBC instrumentation", "IBCINSTR", false, -1, false) +CompPhaseNameMacro(PHASE_IBCINSTR, "Profile instrumentation", "IBCINSTR", false, -1, false) +CompPhaseNameMacro(PHASE_INCPROFILE, "Profile incorporation", "INCPROF", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1, false) CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1, true) CompPhaseNameMacro(PHASE_MORPH_ADD_INTERNAL, "Morph - Add internal blocks", "MOR-ADD", false, -1, true) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index d20b33efbcd263..2fd7e201cc99a6 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -168,6 +168,9 @@ void Compiler::fgInit() fgPgoSchema = nullptr; fgPgoData = nullptr; fgPgoSchemaCount = 0; + fgNumProfileRuns = 0; + fgPgoBlockCounts = 0; + fgPgoClassProfiles = 0; fgPredListSortVector = nullptr; } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 681b89f72678ba..807c98a7775408 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -877,6 +877,105 @@ PhaseStatus Compiler::fgInstrumentMethod() return PhaseStatus::MODIFIED_EVERYTHING; } +//------------------------------------------------------------------------ +// fgIncorporateProfileData: add block/edge profile data to the flowgraph +// +// Returns: +// appropriate phase status +// +PhaseStatus Compiler::fgIncorporateProfileData() +{ + assert(fgHaveProfileData()); + + // Summarize profile data + // + fgNumProfileRuns = 0; + for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) + { + switch (fgPgoSchema[iSchema].InstrumentationKind) + { + case ICorJitInfo::PgoInstrumentationKind::NumRuns: + fgNumProfileRuns += fgPgoSchema[iSchema].Other; + break; + + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + fgPgoBlockCounts++; + break; + + case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: + fgPgoClassProfiles++; + break; + + default: + break; + } + } + + assert(fgPgoBlockCounts > 0); + + if (fgNumProfileRuns == 0) + { + fgNumProfileRuns = 1; + } + + JITDUMP("Profile summary: %d runs, %d block probes, %d class profiles\n", fgNumProfileRuns, fgPgoBlockCounts, + fgPgoClassProfiles); + + fgIncorporateBlockCounts(); + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//------------------------------------------------------------------------ +// fgIncorporateBlockCounts: read block count based profile data +// and set block weights +// +// Notes: +// Count data for inlinees is scaled (usually down). +// +// Todo: +// Normalize counts. +// +// Take advantage of the (likely) correspondence between block order +// and schema order? +// +void Compiler::fgIncorporateBlockCounts() +{ + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + BasicBlock::weight_t profileWeight; + + // Skip internal and un-imported blocks. + // + if ((block->bbFlags & (BBF_INTERNAL | BBF_IMPORTED)) != BBF_IMPORTED) + { + continue; + } + + if (fgGetProfileWeightForBasicBlock(block->bbCodeOffs, &profileWeight)) + { + if (compIsForInlining()) + { + if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN) + { + double scaledWeight = impInlineInfo->profileScaleFactor * profileWeight; + profileWeight = (BasicBlock::weight_t)scaledWeight; + } + } + + block->setBBProfileWeight(profileWeight); + + if (profileWeight == BB_ZERO_WEIGHT) + { + block->bbSetRunRarely(); + } + else + { + block->bbFlags &= ~BBF_RUN_RARELY; + } + } + } +} + bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) { bool result = false; diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 3a78ef3b2b3def..1b1689e85ac860 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -157,13 +157,11 @@ void Phase::PostPhase(PhaseStatus status) // well as the new-style phases that have been updated to return // PhaseStatus from their DoPhase methods. // - static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, - PHASE_INDXCALL, PHASE_MORPH_INLINE, - PHASE_ALLOCATE_OBJECTS, PHASE_EMPTY_TRY, - PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, - PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, - PHASE_MORPH_GLOBAL, PHASE_BUILD_SSA, - PHASE_RATIONALIZE, PHASE_LOWERING, + static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, PHASE_INCPROFILE, + PHASE_INDXCALL, PHASE_MORPH_INLINE, PHASE_ALLOCATE_OBJECTS, + PHASE_EMPTY_TRY, PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, + PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, PHASE_MORPH_GLOBAL, + PHASE_BUILD_SSA, PHASE_RATIONALIZE, PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; if (madeChanges)