From 1e93d9f6cd738a43cbb80b45fa8d4ff186df2eee Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 25 Jan 2021 19:34:37 -0800 Subject: [PATCH 1/3] JIT: create phase for incorporation of profile data Instead of trying to set block weight data when initially building the flowgraph, do it as a phase just before importation. The importer looks at block weights, so these must be set before importation. Note profile incorporation relies only on the IL level view of the method, and does not depend on the IR. --- src/coreclr/jit/compiler.cpp | 36 +++++++++++++++++-- src/coreclr/jit/compiler.h | 4 +++ src/coreclr/jit/compphases.h | 3 +- src/coreclr/jit/fgprofile.cpp | 65 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/phase.cpp | 12 +++---- 5 files changed, 110 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 036642e254f2b7..d969296716d0a8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2881,6 +2881,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags) fgPgoSchemaCount = 0; fgProfileData_ILSizeMismatch = false; fgNumProfileRuns = 0; + fgPgoBlockCounts = 0; + fgPgoClassProfiles = 0; if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)) { HRESULT hr; @@ -2892,9 +2894,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags) fgNumProfileRuns = 0; for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) { - if (fgPgoSchema[iSchema].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::NumRuns) + switch (fgPgoSchema[iSchema].InstrumentationKind) { - fgNumProfileRuns += fgPgoSchema[iSchema].Other; + case ICorJitInfo::PgoInstrumentationKind::NumRuns: + fgNumProfileRuns += fgPgoSchema[iSchema].Other; + break; + + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + fgPgoBlockCounts++; + break; + + case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: + fgPgoClassProfiles++; } } @@ -2906,6 +2917,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags) "elements, %d runs\n", info.compFullName, hr, dspPtr(fgPgoSchema), dspPtr(fgPgoData), fgPgoSchemaCount, fgNumProfileRuns); + if (fgPgoBlockCounts > 0) + { + JITDUMP(" [%d blockCounts]", fgPgoBlockCounts); + } + + if (fgPgoClassProfiles > 0) + { + JITDUMP(" [%d classProfiles]", fgPgoClassProfiles); + } + + JITDUMP("\n"); + // a failed result that also has a non-NULL fgPgoSchema // indicates that the ILSize for the method no longer matches // the ILSize for the method when profile data was collected. @@ -4416,6 +4439,15 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags compFunctionTraceStart(); + // If profile data is available, incorporate it into the flowgraph. + // Note: the importer is sensitive to block weights, so this has + // to happen before importation. + // + if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) + { + DoPhase(this, PHASE_INCPROFILE, &Compiler::fgIncorporateProfileData); + } + // Import: convert the instrs in each basic block to a tree based intermediate representation // DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ca7057faadf140..b6f23a480b3c2e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5540,6 +5540,8 @@ class Compiler BYTE* fgPgoData; UINT32 fgPgoSchemaCount; UINT32 fgNumProfileRuns; + UINT32 fgPgoBlockCounts; + UINT32 fgPgoClassProfiles; unsigned fgStressBBProf() { @@ -5562,6 +5564,8 @@ class Compiler void fgComputeProfileScale(); bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight); PhaseStatus fgInstrumentMethod(); + PhaseStatus fgIncorporateProfileData(); + void fgIncorporateBlockCounts(); public: // fgIsUsingProfileWeights - returns true if we have real profile data for this method diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 4f304f3c363ab6..23dade2f959265 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -28,7 +28,8 @@ CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", CompPhaseNameMacro(PHASE_INDXCALL, "Indirect call transform", "INDXCALL", false, -1, true) CompPhaseNameMacro(PHASE_PATCHPOINTS, "Expand patchpoints", "PPOINT", false, -1, true) CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1, false) -CompPhaseNameMacro(PHASE_IBCINSTR, "IBC instrumentation", "IBCINSTR", false, -1, false) +CompPhaseNameMacro(PHASE_IBCINSTR, "Profile instrumentation", "IBCINSTR", false, -1, false) +CompPhaseNameMacro(PHASE_INCPROFILE, "Profile incorporation", "INCPROF", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1, false) CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1, true) CompPhaseNameMacro(PHASE_MORPH_ADD_INTERNAL, "Morph - Add internal blocks", "MOR-ADD", false, -1, true) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 681b89f72678ba..498651abab5931 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -877,6 +877,71 @@ PhaseStatus Compiler::fgInstrumentMethod() return PhaseStatus::MODIFIED_EVERYTHING; } +//------------------------------------------------------------------------ +// fgIncorporateProfileData: add block/edge profile data to the flowgraph +// +// Returns: +// appropriate phase status +// +PhaseStatus Compiler::fgIncorporateProfileData() +{ + assert(fgHaveProfileData()); + assert(fgPgoBlockCounts > 0); + fgIncorporateBlockCounts(); + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//------------------------------------------------------------------------ +// fgIncorporateBlockCounts: read block count based profile data +// and set block weights +// +// Notes: +// Count data for inlinees is scaled (usually down). +// +// Todo: +// Normalize counts. +// +// Take advantage of the (likely) correspondence between block order +// and schema order? +// +void Compiler::fgIncorporateBlockCounts() +{ + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + BasicBlock::weight_t profileWeight; + + // Skip internal and un-imported blocks. + // + if ((block->bbFlags & (BBF_INTERNAL | BBF_IMPORTED)) != BBF_IMPORTED) + { + continue; + } + + if (fgGetProfileWeightForBasicBlock(block->bbCodeOffs, &profileWeight)) + { + if (compIsForInlining()) + { + if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN) + { + double scaledWeight = impInlineInfo->profileScaleFactor * profileWeight; + profileWeight = (BasicBlock::weight_t)scaledWeight; + } + } + + block->setBBProfileWeight(profileWeight); + + if (profileWeight == 0) + { + block->bbSetRunRarely(); + } + else + { + block->bbFlags &= ~BBF_RUN_RARELY; + } + } + } +} + bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop) { bool result = false; diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 3a78ef3b2b3def..1b1689e85ac860 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -157,13 +157,11 @@ void Phase::PostPhase(PhaseStatus status) // well as the new-style phases that have been updated to return // PhaseStatus from their DoPhase methods. // - static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, - PHASE_INDXCALL, PHASE_MORPH_INLINE, - PHASE_ALLOCATE_OBJECTS, PHASE_EMPTY_TRY, - PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, - PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, - PHASE_MORPH_GLOBAL, PHASE_BUILD_SSA, - PHASE_RATIONALIZE, PHASE_LOWERING, + static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, PHASE_INCPROFILE, + PHASE_INDXCALL, PHASE_MORPH_INLINE, PHASE_ALLOCATE_OBJECTS, + PHASE_EMPTY_TRY, PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, + PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, PHASE_MORPH_GLOBAL, + PHASE_BUILD_SSA, PHASE_RATIONALIZE, PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; if (madeChanges) From 6fdd0148006d5dfdddba9a862d55f161b5940b47 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 29 Jan 2021 12:57:55 -0800 Subject: [PATCH 2/3] fix build issue: add default case --- src/coreclr/jit/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d969296716d0a8..6dabb4bfa1fcb0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2906,6 +2906,10 @@ void Compiler::compInitOptions(JitFlags* jitFlags) case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: fgPgoClassProfiles++; + break; + + default: + break; } } From ba14cd8abfeac632f3e685dbbed0a2849f557f56 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 29 Jan 2021 15:27:49 -0800 Subject: [PATCH 3/3] review feedback --- src/coreclr/jit/compiler.cpp | 49 +++-------------------------------- src/coreclr/jit/fgbasic.cpp | 3 +++ src/coreclr/jit/fgprofile.cpp | 36 ++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 6dabb4bfa1fcb0..08fc10a8598970 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2880,58 +2880,15 @@ void Compiler::compInitOptions(JitFlags* jitFlags) fgPgoData = nullptr; fgPgoSchemaCount = 0; fgProfileData_ILSizeMismatch = false; - fgNumProfileRuns = 0; - fgPgoBlockCounts = 0; - fgPgoClassProfiles = 0; if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)) { HRESULT hr; hr = info.compCompHnd->getPgoInstrumentationResults(info.compMethodHnd, &fgPgoSchema, &fgPgoSchemaCount, &fgPgoData); - if (SUCCEEDED(hr)) - { - fgNumProfileRuns = 0; - for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) - { - switch (fgPgoSchema[iSchema].InstrumentationKind) - { - case ICorJitInfo::PgoInstrumentationKind::NumRuns: - fgNumProfileRuns += fgPgoSchema[iSchema].Other; - break; - - case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: - fgPgoBlockCounts++; - break; - - case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: - fgPgoClassProfiles++; - break; - - default: - break; - } - } - - if (fgNumProfileRuns == 0) - fgNumProfileRuns = 1; - } - - JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; schema at %p, counts at %p, %d schema " - "elements, %d runs\n", - info.compFullName, hr, dspPtr(fgPgoSchema), dspPtr(fgPgoData), fgPgoSchemaCount, fgNumProfileRuns); - - if (fgPgoBlockCounts > 0) - { - JITDUMP(" [%d blockCounts]", fgPgoBlockCounts); - } - - if (fgPgoClassProfiles > 0) - { - JITDUMP(" [%d classProfiles]", fgPgoClassProfiles); - } - - JITDUMP("\n"); + JITDUMP( + "BBOPT set; query for profile data returned hr %0x, schema at %p, counts at %p, schema element count %d\n", + hr, dspPtr(fgPgoSchema), dspPtr(fgPgoData), fgPgoSchemaCount); // a failed result that also has a non-NULL fgPgoSchema // indicates that the ILSize for the method no longer matches diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index d20b33efbcd263..2fd7e201cc99a6 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -168,6 +168,9 @@ void Compiler::fgInit() fgPgoSchema = nullptr; fgPgoData = nullptr; fgPgoSchemaCount = 0; + fgNumProfileRuns = 0; + fgPgoBlockCounts = 0; + fgPgoClassProfiles = 0; fgPredListSortVector = nullptr; } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 498651abab5931..807c98a7775408 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -886,7 +886,41 @@ PhaseStatus Compiler::fgInstrumentMethod() PhaseStatus Compiler::fgIncorporateProfileData() { assert(fgHaveProfileData()); + + // Summarize profile data + // + fgNumProfileRuns = 0; + for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) + { + switch (fgPgoSchema[iSchema].InstrumentationKind) + { + case ICorJitInfo::PgoInstrumentationKind::NumRuns: + fgNumProfileRuns += fgPgoSchema[iSchema].Other; + break; + + case ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount: + fgPgoBlockCounts++; + break; + + case ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount: + fgPgoClassProfiles++; + break; + + default: + break; + } + } + assert(fgPgoBlockCounts > 0); + + if (fgNumProfileRuns == 0) + { + fgNumProfileRuns = 1; + } + + JITDUMP("Profile summary: %d runs, %d block probes, %d class profiles\n", fgNumProfileRuns, fgPgoBlockCounts, + fgPgoClassProfiles); + fgIncorporateBlockCounts(); return PhaseStatus::MODIFIED_EVERYTHING; } @@ -930,7 +964,7 @@ void Compiler::fgIncorporateBlockCounts() block->setBBProfileWeight(profileWeight); - if (profileWeight == 0) + if (profileWeight == BB_ZERO_WEIGHT) { block->bbSetRunRarely(); }