diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index dedbde9c61b413..3952315539bfdc 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -541,6 +541,7 @@ jobs: - fullpgo_random_gdv_methodprofiling_only - fullpgo_random_gdv_edge - fullpgo_methodprofiling_always_optimized + - syntheticpgo ${{ if in(parameters.testGroup, 'gc-longrunning') }}: longRunningGcTests: true scenarios: diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml index 01ee56273b8613..58da034f4a92d0 100644 --- a/eng/pipelines/libraries/run-test-job.yml +++ b/eng/pipelines/libraries/run-test-job.yml @@ -200,3 +200,4 @@ jobs: - fullpgo_random_gdv_edge - jitosr_stress - jitosr_stress_random + - syntheticpgo diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9af2e4fa37d2aa..006141a555a696 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1577,6 +1577,53 @@ inline constexpr FlowGraphUpdates operator&(FlowGraphUpdates a, FlowGraphUpdates return (FlowGraphUpdates)((unsigned int)a & (unsigned int)b); } +// Profile checking options +// +// clang-format off +enum class ProfileChecks : unsigned int +{ + CHECK_NONE = 0, + CHECK_CLASSIC = 1 << 0, // check "classic" jit weights + CHECK_LIKELY = 1 << 1, // check likelihood based weights + RAISE_ASSERT = 1 << 2, // assert on check failure + CHECK_ALL_BLOCKS = 1 << 3, // check blocks even if bbHasProfileWeight is false +}; + +inline constexpr ProfileChecks operator ~(ProfileChecks a) +{ + return (ProfileChecks)(~(unsigned int)a); +} + +inline constexpr ProfileChecks operator |(ProfileChecks a, ProfileChecks b) +{ + return (ProfileChecks)((unsigned int)a | (unsigned int)b); +} + +inline constexpr ProfileChecks operator &(ProfileChecks a, ProfileChecks b) +{ + return (ProfileChecks)((unsigned int)a & (unsigned int)b); +} + +inline ProfileChecks& operator |=(ProfileChecks& a, ProfileChecks b) +{ + return a = (ProfileChecks)((unsigned int)a | (unsigned int)b); +} + +inline ProfileChecks& operator &=(ProfileChecks& a, ProfileChecks b) +{ + return a = (ProfileChecks)((unsigned int)a & (unsigned int)b); +} + +inline ProfileChecks& operator ^=(ProfileChecks& a, ProfileChecks b) +{ + return a = (ProfileChecks)((unsigned int)a ^ (unsigned int)b); +} + +inline bool hasFlag(const ProfileChecks& flagSet, const ProfileChecks& flag) +{ + return ((flagSet & flag) == flag); +} + //--------------------------------------------------------------- // Compilation time. // @@ -5516,8 +5563,9 @@ class Compiler void fgDebugCheckFlagsHelper(GenTree* tree, GenTreeFlags actualFlags, GenTreeFlags expectedFlags); void fgDebugCheckTryFinallyExits(); void fgDebugCheckProfileWeights(); - bool fgDebugCheckIncomingProfileData(BasicBlock* block); - bool fgDebugCheckOutgoingProfileData(BasicBlock* block); + void fgDebugCheckProfileWeights(ProfileChecks checks); + bool fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks checks); + bool fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks checks); #endif // DEBUG diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 7d8ed5d1178b8e..87b1ae3a47c990 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -957,7 +957,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos) } // "Raw" Profile weight - if (block->hasProfileWeight()) + if (block->hasProfileWeight() || (JitConfig.JitSynthesizeCounts() > 0)) { fprintf(fgxFile, "\\n\\n%7.2f", ((double)block->getBBWeight(this)) / BB_UNITY_WEIGHT); } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index bd2b1984b57a9b..382b5374c1d5f9 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -596,6 +596,26 @@ void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8 (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount)); uint8_t* addrOfCurrentExecutionCount = entry.Offset + profileMemory; +#ifdef DEBUG + if (JitConfig.JitPropagateSynthesizedCountsToProfileData() > 0) + { + // Write the current synthesized count as the profile data + // + weight_t blockWeight = block->bbWeight; + + if (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount) + { + *((uint32_t*)addrOfCurrentExecutionCount) = (uint32_t)blockWeight; + } + else + { + *((uint64_t*)addrOfCurrentExecutionCount) = (uint64_t)blockWeight; + } + + return; + } +#endif + var_types typ = entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount ? TYP_INT : TYP_LONG; @@ -1742,6 +1762,32 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem uint8_t* addrOfCurrentExecutionCount = profileMemory + entry.Offset; +#ifdef DEBUG + if (JitConfig.JitPropagateSynthesizedCountsToProfileData() > 0) + { + // Write the current synthesized count as the profile data + // + // Todo: handle pseudo edges! + FlowEdge* const edge = m_comp->fgGetPredForBlock(source, target); + + if (edge != nullptr) + { + weight_t edgeWeight = edge->getLikelyWeight(); + + if (entry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::EdgeIntCount) + { + *((uint32_t*)addrOfCurrentExecutionCount) = (uint32_t)edgeWeight; + } + else + { + *((uint64_t*)addrOfCurrentExecutionCount) = (uint64_t)edgeWeight; + } + } + + return; + } +#endif + // Determine where to place the probe. // BasicBlock* instrumentedBlock = nullptr; @@ -2428,7 +2474,7 @@ PhaseStatus Compiler::fgIncorporateProfileData() } #ifdef DEBUG - // Optionally just run synthesis + // Optionally run synthesis // if ((JitConfig.JitSynthesizeCounts() > 0) && !compIsForInlining()) { @@ -2439,6 +2485,17 @@ PhaseStatus Compiler::fgIncorporateProfileData() return PhaseStatus::MODIFIED_EVERYTHING; } } + + // Or run synthesis and save the data out as the actual profile data + // + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && + (JitConfig.JitPropagateSynthesizedCountsToProfileData() > 0) && !compIsForInlining()) + { + JITDUMP("Synthesizing profile data and writing it out as the actual profile data\n"); + ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods); + fgPgoHaveWeights = false; + return PhaseStatus::MODIFIED_EVERYTHING; + } #endif // Do we have profile data? @@ -4845,15 +4902,8 @@ bool Compiler::fgProfileWeightsConsistent(weight_t weight1, weight_t weight2) // (or nearly so) // // Notes: -// For each profiled block, check that the flow of counts into -// the block matches the flow of counts out of the block. -// -// We ignore EH flow as we don't have explicit edges and generally -// we expect EH edge counts to be small, so errors from ignoring -// them should be rare. -// -// There's no point checking until we've built pred lists, as -// we can't easily reason about consistency without them. +// Does nothing, if profile checks are disabled, or there are +// no profile weights or pred lists. // void Compiler::fgDebugCheckProfileWeights() { @@ -4865,13 +4915,37 @@ void Compiler::fgDebugCheckProfileWeights() return; } + fgDebugCheckProfileWeights((ProfileChecks)JitConfig.JitProfileChecks()); +} + +//------------------------------------------------------------------------ +// fgDebugCheckProfileWeights: verify profile weights are self-consistent +// (or nearly so) +// +// Arguments: +// checks - checker options +// +// Notes: +// For each profiled block, check that the flow of counts into +// the block matches the flow of counts out of the block. +// +// We ignore EH flow as we don't have explicit edges and generally +// we expect EH edge counts to be small, so errors from ignoring +// them should be rare. +// +// There's no point checking until we've built pred lists, as +// we can't easily reason about consistency without them. +// +void Compiler::fgDebugCheckProfileWeights(ProfileChecks checks) +{ // We can check classic (min/max, late computed) weights // and/or // new likelyhood based weights. // - const bool verifyClassicWeights = fgEdgeWeightsComputed && (JitConfig.JitProfileChecks() & 0x1) == 0x1; - const bool verifyLikelyWeights = (JitConfig.JitProfileChecks() & 0x2) == 0x2; - const bool assertOnFailure = (JitConfig.JitProfileChecks() & 0x4) == 0x4; + const bool verifyClassicWeights = fgEdgeWeightsComputed && hasFlag(checks, ProfileChecks::CHECK_CLASSIC); + const bool verifyLikelyWeights = hasFlag(checks, ProfileChecks::CHECK_LIKELY); + const bool assertOnFailure = hasFlag(checks, ProfileChecks::RAISE_ASSERT); + const bool checkAllBlocks = hasFlag(checks, ProfileChecks::CHECK_ALL_BLOCKS); if (!(verifyClassicWeights || verifyLikelyWeights)) { @@ -4891,7 +4965,7 @@ void Compiler::fgDebugCheckProfileWeights() // for (BasicBlock* const block : Blocks()) { - if (!block->hasProfileWeight()) + if (!block->hasProfileWeight() && !checkAllBlocks) { unprofiledBlocks++; continue; @@ -4929,8 +5003,11 @@ void Compiler::fgDebugCheckProfileWeights() // if (block->KindIs(BBJ_RETURN, BBJ_THROW)) { - exitWeight += blockWeight; - exitProfiled = !opts.IsOSR(); + if (BasicBlock::sameHndRegion(block, fgFirstBB)) + { + exitWeight += blockWeight; + exitProfiled = !opts.IsOSR(); + } verifyOutgoing = false; } @@ -4969,12 +5046,12 @@ void Compiler::fgDebugCheckProfileWeights() if (verifyIncoming) { - incomingConsistent = fgDebugCheckIncomingProfileData(block); + incomingConsistent = fgDebugCheckIncomingProfileData(block, checks); } if (verifyOutgoing) { - outgoingConsistent = fgDebugCheckOutgoingProfileData(block); + outgoingConsistent = fgDebugCheckOutgoingProfileData(block, checks); } if (!incomingConsistent || !outgoingConsistent) @@ -4987,7 +5064,13 @@ void Compiler::fgDebugCheckProfileWeights() // if (entryProfiled && exitProfiled) { - if (!fgProfileWeightsConsistent(entryWeight, exitWeight)) + // Note these may not agree, if fgEntryBB is a loop header. + // + if (fgFirstBB->bbRefs > 1) + { + JITDUMP(" Method entry " FMT_BB " is loop head, can't check entry/exit balance\n"); + } + else if (!fgProfileWeightsConsistent(entryWeight, exitWeight)) { problemBlocks++; JITDUMP(" Method entry " FMT_WT " method exit " FMT_WT " weight mismatch\n", entryWeight, exitWeight); @@ -5025,7 +5108,8 @@ void Compiler::fgDebugCheckProfileWeights() // block matches the profile weight of the block. // // Arguments: -// block - block to check +// block - block to check +// checks - checker options // // Returns: // true if counts consistent or checking disabled, false otherwise. @@ -5033,10 +5117,10 @@ void Compiler::fgDebugCheckProfileWeights() // Notes: // Only useful to call on blocks with predecessors. // -bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) +bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks checks) { - const bool verifyClassicWeights = fgEdgeWeightsComputed && (JitConfig.JitProfileChecks() & 0x1) == 0x1; - const bool verifyLikelyWeights = (JitConfig.JitProfileChecks() & 0x2) == 0x2; + const bool verifyClassicWeights = fgEdgeWeightsComputed && hasFlag(checks, ProfileChecks::CHECK_CLASSIC); + const bool verifyLikelyWeights = hasFlag(checks, ProfileChecks::CHECK_LIKELY); if (!(verifyClassicWeights || verifyLikelyWeights)) { @@ -5056,7 +5140,10 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) incomingWeightMax += predEdge->edgeWeightMax(); if (predEdge->hasLikelihood()) { - incomingLikelyWeight += predEdge->getLikelyWeight(); + if (BasicBlock::sameHndRegion(block, predEdge->getSourceBlock())) + { + incomingLikelyWeight += predEdge->getLikelyWeight(); + } } else { @@ -5122,6 +5209,7 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) // // Arguments: // block - block to check +// checks - checker options // // Returns: // true if counts consistent or checking disabled, false otherwise. @@ -5129,10 +5217,10 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) // Notes: // Only useful to call on blocks with successors. // -bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) +bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks checks) { - const bool verifyClassicWeights = fgEdgeWeightsComputed && (JitConfig.JitProfileChecks() & 0x1) == 0x1; - const bool verifyLikelyWeights = (JitConfig.JitProfileChecks() & 0x2) == 0x2; + const bool verifyClassicWeights = fgEdgeWeightsComputed && hasFlag(checks, ProfileChecks::CHECK_CLASSIC); + const bool verifyLikelyWeights = hasFlag(checks, ProfileChecks::CHECK_LIKELY); if (!(verifyClassicWeights || verifyLikelyWeights)) { diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 4260012df26f33..4b2fde5920431d 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -97,6 +97,21 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) // act as if we don't have "real" profile data. // m_comp->fgPgoHaveWeights = false; + +#ifdef DEBUG + if (JitConfig.JitCheckSynthesizedCounts() > 0) + { + // Verify consistency, provided we didn't see any improper headers + // or cap any Cp values. + // + if ((m_improperLoopHeaders == 0) && (m_cappedCyclicProbabilities == 0)) + { + // verify likely weights, assert on failure, check all blocks + m_comp->fgDebugCheckProfileWeights(ProfileChecks::CHECK_LIKELY | ProfileChecks::RAISE_ASSERT | + ProfileChecks::CHECK_ALL_BLOCKS); + } + } +#endif } //------------------------------------------------------------------------ @@ -477,7 +492,8 @@ void ProfileSynthesis::BuildReversePostorder() printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i) { - printf("%02u -> " FMT_BB "\n", i, m_comp->fgBBReversePostorder[i]->bbNum); + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + printf("%02u -> " FMT_BB "[%u, %u]\n", i, block->bbNum, block->bbPreorderNum, block->bbPostorderNum); } printf("\n"); } @@ -489,9 +505,8 @@ void ProfileSynthesis::BuildReversePostorder() // void ProfileSynthesis::FindLoops() { - CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); - m_loops = new (allocator) LoopVector(allocator); - unsigned improperLoopCount = 0; + CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); + m_loops = new (allocator) LoopVector(allocator); // Identify loops // @@ -585,7 +600,7 @@ void ProfileSynthesis::FindLoops() loopBlock->bbNum); isNaturalLoop = false; - improperLoopCount++; + m_improperLoopHeaders++; break; } @@ -690,9 +705,9 @@ void ProfileSynthesis::FindLoops() JITDUMP("\nFound %d loops\n", m_loops->size()); } - if (improperLoopCount > 0) + if (m_improperLoopHeaders > 0) { - JITDUMP("Rejected %d loops\n", improperLoopCount); + JITDUMP("Rejected %d loop headers\n", m_improperLoopHeaders); } } @@ -766,12 +781,10 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) for (FlowEdge* const edge : nestedLoop->m_entryEdges) { - if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { - continue; + newWeight += edge->getLikelyWeight(); } - - newWeight += edge->getLikelyWeight(); } newWeight *= nestedLoop->m_cyclicProbability; @@ -785,12 +798,10 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) for (FlowEdge* const edge : block->PredEdges()) { - if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { - continue; + newWeight += edge->getLikelyWeight(); } - - newWeight += edge->getLikelyWeight(); } block->bbWeight = newWeight; @@ -822,6 +833,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) { capped = true; cyclicWeight = 0.999; + m_cappedCyclicProbabilities++; } weight_t cyclicProbability = 1.0 / (1.0 - cyclicWeight); @@ -885,7 +897,10 @@ void ProfileSynthesis::ComputeBlockWeights() for (FlowEdge* const edge : loop->m_entryEdges) { - newWeight += edge->getLikelyWeight(); + if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + newWeight += edge->getLikelyWeight(); + } } newWeight *= loop->m_cyclicProbability; @@ -901,7 +916,10 @@ void ProfileSynthesis::ComputeBlockWeights() for (FlowEdge* const edge : block->PredEdges()) { - newWeight += edge->getLikelyWeight(); + if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + newWeight += edge->getLikelyWeight(); + } } block->bbWeight = newWeight; diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index 393f332089c7f0..82010f6d9ab43f 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -63,7 +63,12 @@ class ProfileSynthesis } private: - ProfileSynthesis(Compiler* compiler) : m_comp(compiler), m_loops(nullptr), m_bbNumToBlockMap(nullptr) + ProfileSynthesis(Compiler* compiler) + : m_comp(compiler) + , m_loops(nullptr) + , m_bbNumToBlockMap(nullptr) + , m_improperLoopHeaders(0) + , m_cappedCyclicProbabilities(0) { } @@ -97,6 +102,8 @@ class ProfileSynthesis Compiler* const m_comp; LoopVector* m_loops; BasicBlock** m_bbNumToBlockMap; + unsigned m_improperLoopHeaders; + unsigned m_cappedCyclicProbabilities; }; #endif // !_FGPROFILESYNTHESIS_H_ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index a1305299854ee3..caad83a958e283 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -595,6 +595,11 @@ CONFIG_INTEGER(JitRandomlyCollect64BitCounts, W("JitRandomlyCollect64BitCounts") // 1: profile synthesis for root methods // 2: profile synthesis for root methods w/o pgo data CONFIG_INTEGER(JitSynthesizeCounts, W("JitSynthesizeCounts"), 0) +// Check if synthesis left consistent counts +CONFIG_INTEGER(JitCheckSynthesizedCounts, W("JitCheckSynthesizedCounts"), 0) +// If instrumenting the method, run synthesis and save the synthesis results +// as edge or block profile data. Do not actually instrument. +CONFIG_INTEGER(JitPropagateSynthesizedCountsToProfileData, W("JitPropagateSynthesizedCountsToProfileData"), 0) #endif // Devirtualize virtual calls with getExactClasses (NativeAOT only for now) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 07b1e91aee7cf4..8cb875e2548f1e 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5240,10 +5240,11 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) { - const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->bbNext); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->bbJumpDest); + const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); + const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->bbNext, checks); + const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->bbJumpDest, checks); - if ((JitConfig.JitProfileChecks() & 0x4) == 0x4) + if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) { assert(nextProfileOk); assert(jumpProfileOk); diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index e11acfa2653527..02c5923d0c3783 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -72,7 +72,9 @@ DOTNET_TieredPGO_InstrumentedTierAlwaysOptimized; DOTNET_JitForceControlFlowGuard; DOTNET_JitCFGUseDispatcher; - RunningIlasmRoundTrip + RunningIlasmRoundTrip; + DOTNET_JitSynthesizeProfile; + DOTNET_JitCheckSynthesizedCounts @@ -219,6 +221,7 @@ +