From ab2fc1bb9a47ad4a6e595dfff05f3eac8084cdec Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 5 Mar 2024 11:11:12 -0800 Subject: [PATCH] JIT: profile checking through loop opts Keep profile checks enabled until after we have finished running the loop optmizations (recall this is currently just checking for edge likelihood consistency). Fix various maintenance issues to make this possible. Most are straightforward, but a few are not: * Whenever we create a new BBJ_COND we have to figure out the right likelihoods. If we're copying an existing one (say loop inversion) we currently duplicate the likelihoods. This is a choice, and it may not accurately represent what happends, but we have no better information. * If we invent new branching structures we need to put in reasonable likelihoods. For cloning we assume flowing to the cold loop is unlikely but can happen. Block weights and edge likelihoods are not yet consistent. The plan is to get all the edge likelihoods "correct" and self-consistent, and then start rectifying edge likelihoods and block weights. Contributes to #93020. --- src/coreclr/jit/block.cpp | 36 +++++++++++++++- src/coreclr/jit/block.h | 1 + src/coreclr/jit/compiler.cpp | 10 ++--- src/coreclr/jit/fgflow.cpp | 1 + src/coreclr/jit/fgopt.cpp | 61 ++++++++++++++++++++++---- src/coreclr/jit/loopcloning.cpp | 22 ++++++++++ src/coreclr/jit/morph.cpp | 39 ++++++++++++----- src/coreclr/jit/optimizer.cpp | 76 ++++++++++++++++++++++----------- 8 files changed, 197 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index bd9abbe7fef30d..ec81d028c43a52 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -69,7 +69,7 @@ unsigned SsaStressHashHelper() #endif //------------------------------------------------------------------------ -// setLikelihood: set the likelihood of aflow edge +// setLikelihood: set the likelihood of a flow edge // // Arguments: // likelihood -- value in range [0.0, 1.0] indicating how likely @@ -86,6 +86,40 @@ void FlowEdge::setLikelihood(weight_t likelihood) m_likelihood); } +//------------------------------------------------------------------------ +// addLikelihood: adjust the likelihood of a flow edge +// +// Arguments: +// addedLikelihood -- value in range [-likelihood, 1.0 - likelihood] +// to add to current likelihood. +// +void FlowEdge::addLikelihood(weight_t addedLikelihood) +{ + assert(m_likelihoodSet); + + weight_t newLikelihood = m_likelihood + addedLikelihood; + + // Tolerate slight overflow or underflow + // + const weight_t eps = 0.0001; + + if ((newLikelihood < 0) && (newLikelihood > -eps)) + { + newLikelihood = 0.0; + } + else if ((newLikelihood > 1) && (newLikelihood < 1 + eps)) + { + newLikelihood = 1.0; + } + + assert(newLikelihood >= 0.0); + assert(newLikelihood <= 1.0); + m_likelihood = newLikelihood; + + JITDUMP("updating likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT "\n", m_sourceBlock->bbNum, m_destBlock->bbNum, + m_likelihood); +} + //------------------------------------------------------------------------ // AllSuccessorEnumerator: Construct an instance of the enumerator. // diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 506f6aff702f54..9627cb6395c965 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -671,6 +671,7 @@ struct FlowEdge } void setLikelihood(weight_t likelihood); + void addLikelihood(weight_t addedLikelihod); void clearLikelihood() { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2bba7db9ae7ae5..ad9bd3f1ef9901 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4610,11 +4610,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_MORPH_ADD_INTERNAL, &Compiler::fgAddInternal); - // Disable profile checks now. - // Over time we will move this further and further back in the phase list, as we fix issues. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - // Remove empty try regions // DoPhase(this, PHASE_EMPTY_TRY, &Compiler::fgRemoveEmptyTry); @@ -4855,6 +4850,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators); } + // Disable profile checks now. + // Over time we will move this further and further back in the phase list, as we fix issues. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + #ifdef DEBUG fgDebugCheckLinks(); #endif diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp index 79e9d41d7dc96b..deb87efd76c41d 100644 --- a/src/coreclr/jit/fgflow.cpp +++ b/src/coreclr/jit/fgflow.cpp @@ -157,6 +157,7 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE if (flow != nullptr) { // The predecessor block already exists in the flow list; simply add to its duplicate count. + // noway_assert(flow->getDupCount()); flow->incrementDupCount(); } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index dc492aeaf5924b..c6d54ec71af18e 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1891,6 +1891,27 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) FlowEdge* const newEdge = fgAddRefPred(bNewDest, block, oldEdge); *jmpTab = newEdge; + // Update edge likelihoods + // Note old edge may still be "in use" so we decrease its likelihood. + // + if (oldEdge->hasLikelihood()) + { + // We want to move this much likelihood from old->new + // + const weight_t likelihoodFraction = oldEdge->getLikelihood() / (oldEdge->getDupCount() + 1); + + if (newEdge->getDupCount() == 1) + { + newEdge->setLikelihood(likelihoodFraction); + } + else + { + newEdge->addLikelihood(likelihoodFraction); + } + + oldEdge->addLikelihood(-likelihoodFraction); + } + // we optimized a Switch label - goto REPEAT_SWITCH to follow this new jump modified = true; @@ -2903,11 +2924,24 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // We need to update the following flags of the bJump block if they were set in the bDest block bJump->CopyFlags(bDest, BBF_COPY_PROPAGATE); - /* Update bbRefs and bbPreds */ + // Update bbRefs and bbPreds + // + // For now we set the likelihood of the new branch to match + // the likelihood of the old branch. + // + // This may or may not match the block weight adjustments we're + // making. All this becomes easier to reconcile once we rely on + // edge likelihoods more and have synthesis running. + // + // Until then we won't worry that edges and blocks are potentially + // out of sync. + // + FlowEdge* const destFalseEdge = bDest->GetFalseEdge(); + FlowEdge* const destTrueEdge = bDest->GetTrueEdge(); // bJump now falls through into the next block // - FlowEdge* const falseEdge = fgAddRefPred(bJump->Next(), bJump); + FlowEdge* const falseEdge = fgAddRefPred(bJump->Next(), bJump, destFalseEdge); // bJump no longer jumps to bDest // @@ -2915,7 +2949,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // bJump now jumps to bDest's normal jump target // - FlowEdge* const trueEdge = fgAddRefPred(bDestNormalTarget, bJump); + FlowEdge* const trueEdge = fgAddRefPred(bDestNormalTarget, bJump, destTrueEdge); bJump->SetCond(trueEdge, falseEdge); @@ -3076,7 +3110,9 @@ bool Compiler::fgOptimizeSwitchJumps() newBlock->setBBProfileWeight(blockToNewBlockWeight); blockToTargetEdge->setEdgeWeights(blockToTargetWeight, blockToTargetWeight, dominantTarget); + blockToTargetEdge->setLikelihood(fraction); blockToNewBlockEdge->setEdgeWeights(blockToNewBlockWeight, blockToNewBlockWeight, block); + blockToNewBlockEdge->setLikelihood(max(0, 1.0 - fraction)); // There may be other switch cases that lead to this same block, but there's just // one edge in the flowgraph. So we need to subtract off the profile data that now flows @@ -5020,11 +5056,20 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh } // Optimize the Conditional JUMP to go to the new target - fgRemoveRefPred(block->GetFalseEdge()); - fgRemoveRefPred(bNext->GetTargetEdge()); - block->SetFalseEdge(block->GetTrueEdge()); - FlowEdge* const newEdge = fgAddRefPred(bNext->GetTarget(), block, bNext->GetTargetEdge()); - block->SetTrueEdge(newEdge); + // + FlowEdge* const oldFalseEdge = block->GetFalseEdge(); + FlowEdge* const oldTrueEdge = block->GetTrueEdge(); + FlowEdge* const oldNextEdge = bNext->GetTargetEdge(); + + // bNext no longer flows to target + // + fgRemoveRefPred(oldNextEdge); + + // Rewire flow from block + // + block->SetFalseEdge(oldTrueEdge); + FlowEdge* const newTrueEdge = fgAddRefPred(bNext->GetTarget(), block, oldFalseEdge); + block->SetTrueEdge(newTrueEdge); /* Unlink bNext from the BasicBlock list; note that we can diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index c6d37dc507b95a..95854e35c6d776 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -853,11 +853,27 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* noway_assert(conds.Size() > 0); assert(slowPreheader != nullptr); + // For now assume high likelihood for the fast path, + // uniformly spread across the gating branches. + // + // For "normal" cloning this is probably ok. For GDV cloning this + // may be inaccurate. We should key off the type test likelihood(s). + // + // TODO: this is a bit of out sync with what we do for block weights. + // Reconcile. + // + const weight_t fastLikelihood = 0.999; + // Choose how to generate the conditions const bool generateOneConditionPerBlock = true; if (generateOneConditionPerBlock) { + // N = conds.Size() branches must all be true to execute the fast loop. + // Use the N'th root.... + // + const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size()); + for (unsigned i = 0; i < conds.Size(); ++i) { BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true); @@ -866,12 +882,14 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum); FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk); newBlk->SetTrueEdge(trueEdge); + trueEdge->setLikelihood(1 - fastLikelihoodPerBlock); if (insertAfter->KindIs(BBJ_COND)) { JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum); FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter); insertAfter->SetFalseEdge(falseEdge); + falseEdge->setLikelihood(fastLikelihoodPerBlock); } JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum); @@ -901,12 +919,14 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum); FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk); newBlk->SetTrueEdge(trueEdge); + trueEdge->setLikelihood(1.0 - fastLikelihood); if (insertAfter->KindIs(BBJ_COND)) { JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum); FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter); insertAfter->SetFalseEdge(falseEdge); + falseEdge->setLikelihood(fastLikelihood); } JITDUMP("Adding conditions to " FMT_BB "\n", newBlk->bbNum); @@ -2069,6 +2089,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex assert(condLast->NextIs(fastPreheader)); FlowEdge* const falseEdge = fgAddRefPred(fastPreheader, condLast); condLast->SetFalseEdge(falseEdge); + FlowEdge* const trueEdge = condLast->GetTrueEdge(); + falseEdge->setLikelihood(max(0, 1.0 - trueEdge->getLikelihood())); } //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index bbe9d16aa2808c..f81f74e7161296 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14679,15 +14679,20 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) thenBlock->SetFlags(BBF_IMPORTED); } + const unsigned thenLikelihood = qmark->ThenNodeLikelihood(); + const unsigned elseLikelihood = qmark->ElseNodeLikelihood(); + FlowEdge* const newEdge = fgAddRefPred(remainderBlock, thenBlock); thenBlock->SetTargetEdge(newEdge); assert(condBlock->TargetIs(elseBlock)); - FlowEdge* const falseEdge = fgAddRefPred(thenBlock, condBlock); - condBlock->SetCond(condBlock->GetTargetEdge(), falseEdge); - - thenBlock->inheritWeightPercentage(condBlock, qmark->ThenNodeLikelihood()); - elseBlock->inheritWeightPercentage(condBlock, qmark->ElseNodeLikelihood()); + FlowEdge* const elseEdge = fgAddRefPred(thenBlock, condBlock); + FlowEdge* const thenEdge = condBlock->GetTargetEdge(); + condBlock->SetCond(thenEdge, elseEdge); + thenBlock->inheritWeightPercentage(condBlock, thenLikelihood); + elseBlock->inheritWeightPercentage(condBlock, elseLikelihood); + thenEdge->setLikelihood(thenLikelihood / 100.0); + elseEdge->setLikelihood(elseLikelihood / 100.0); } else if (hasTrueExpr) { @@ -14699,15 +14704,21 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) // gtReverseCond(condExpr); + const unsigned thenLikelihood = qmark->ThenNodeLikelihood(); + const unsigned elseLikelihood = qmark->ElseNodeLikelihood(); + assert(condBlock->TargetIs(elseBlock)); - FlowEdge* const trueEdge = fgAddRefPred(remainderBlock, condBlock); - condBlock->SetCond(trueEdge, condBlock->GetTargetEdge()); + FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock); + FlowEdge* const elseEdge = condBlock->GetTargetEdge(); + condBlock->SetCond(thenEdge, elseEdge); // Since we have no false expr, use the one we'd already created. thenBlock = elseBlock; elseBlock = nullptr; - thenBlock->inheritWeightPercentage(condBlock, qmark->ThenNodeLikelihood()); + thenBlock->inheritWeightPercentage(condBlock, thenLikelihood); + thenEdge->setLikelihood(thenLikelihood / 100.0); + elseEdge->setLikelihood(elseLikelihood / 100.0); } else if (hasFalseExpr) { @@ -14717,11 +14728,17 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) // +-->------------+ // bbj_cond(true) // + const unsigned thenLikelihood = qmark->ThenNodeLikelihood(); + const unsigned elseLikelihood = qmark->ElseNodeLikelihood(); + assert(condBlock->TargetIs(elseBlock)); - FlowEdge* const trueEdge = fgAddRefPred(remainderBlock, condBlock); - condBlock->SetCond(trueEdge, condBlock->GetTargetEdge()); + FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock); + FlowEdge* const elseEdge = condBlock->GetTargetEdge(); + condBlock->SetCond(thenEdge, elseEdge); - elseBlock->inheritWeightPercentage(condBlock, qmark->ElseNodeLikelihood()); + elseBlock->inheritWeightPercentage(condBlock, elseLikelihood); + thenEdge->setLikelihood(thenLikelihood / 100.0); + elseEdge->setLikelihood(elseLikelihood / 100.0); } assert(condBlock->KindIs(BBJ_COND)); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3fea8881bf160c..bfc79ef239ce6a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -631,8 +631,10 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo falseTarget = blk->GetFalseTarget(); } - FlowEdge* const trueEdge = fgAddRefPred(trueTarget, newBlk); - FlowEdge* const falseEdge = fgAddRefPred(falseTarget, newBlk); + FlowEdge* const oldTrueEdge = blk->GetTrueEdge(); + FlowEdge* const trueEdge = fgAddRefPred(trueTarget, newBlk, oldTrueEdge); + FlowEdge* const oldFalseEdge = blk->GetFalseEdge(); + FlowEdge* const falseEdge = fgAddRefPred(falseTarget, newBlk, oldFalseEdge); newBlk->SetCond(trueEdge, falseEdge); break; } @@ -682,15 +684,21 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo // Determine if newBlk should target switchTarget, or be redirected if (redirectMap->Lookup(switchTarget, &newTarget)) { - // TODO: Set likelihood using inspiringEdge newEdge = fgAddRefPred(newTarget, newBlk); } else { - // TODO: Set likelihood using inspiringEdge newEdge = fgAddRefPred(switchTarget, newBlk); } + // Transfer likelihood... instead of doing this gradually + // for dup'd edges, we set it once, when we add the last dup. + // + if (newEdge->getDupCount() == inspiringEdge->getDupCount()) + { + newEdge->setLikelihood(inspiringEdge->getLikelihood()); + } + newSwtDesc->bbsDstTab[i] = newEdge; } @@ -1861,32 +1869,36 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t // // Specifically, we're looking for the following case: // +// block: // ... // jmp test // `block` argument -// loop: +// top: // ... // ... // test: // ..stmts.. // cond -// jtrue loop +// jtrue top // // If we find this, and the condition is simple enough, we change // the loop to the following: // +// block: // ... +// jmp bNewCond +// bNewCond: // ..stmts.. // duplicated cond block statements // cond // duplicated cond -// jfalse done +// jfalse join // // else fall-through -// loop: +// top: // ... // ... // test: // ..stmts.. // cond -// jtrue loop -// done: +// jtrue top +// join: // // Makes no changes if the flow pattern match fails. // @@ -2142,7 +2154,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) bool foundCondTree = false; // Create a new block after `block` to put the copied condition code. - BasicBlock* bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true); + // + BasicBlock* const bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true); // Clone each statement in bTest and append to bNewCond. for (Statement* const stmt : bTest->Statements()) @@ -2201,10 +2214,23 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Update pred info // - FlowEdge* const trueEdge = fgAddRefPred(bJoin, bNewCond); - FlowEdge* const falseEdge = fgAddRefPred(bTop, bNewCond); - bNewCond->SetTrueEdge(trueEdge); - bNewCond->SetFalseEdge(falseEdge); + // For now we set the likelihood of the newCond branch to match + // the likelihood of the test branch (though swapped, since we're + // currently reversing the condition). This may or may not match + // the block weight adjustments we're making. All this becomes + // easier to reconcile once we rely on edge likelihoods more and + // have synthesis running (so block weights ==> frequencies). + // + // Until then we won't worry that edges and blocks are potentially + // out of sync. + // + FlowEdge* const testTopEdge = bTest->GetTrueEdge(); + FlowEdge* const testJoinEdge = bTest->GetFalseEdge(); + FlowEdge* const newCondJoinEdge = fgAddRefPred(bJoin, bNewCond, testJoinEdge); + FlowEdge* const newCondTopEdge = fgAddRefPred(bTop, bNewCond, testTopEdge); + + bNewCond->SetTrueEdge(newCondJoinEdge); + bNewCond->SetFalseEdge(newCondTopEdge); fgRemoveRefPred(block->GetTargetEdge()); FlowEdge* const newEdge = fgAddRefPred(bNewCond, block); @@ -2222,7 +2248,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // is maintained no matter which condition block we point to, but we'll lose optimization potential // (and create spaghetti code) if we get it wrong. // - unsigned const loopFirstNum = bTop->bbNum; unsigned const loopBottomNum = bTest->bbNum; for (BasicBlock* const predBlock : bTest->PredBlocks()) @@ -2323,18 +2348,21 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight, bNewCond->GetTrueTarget()); #ifdef DEBUG - // If we're checkig profile data, see if profile for the two target blocks is consistent. + // If we're checking profile data, see if profile for the two target blocks is consistent. // if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) { - const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks); - - if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) + if (JitConfig.JitProfileChecks() > 0) { - assert(nextProfileOk); - assert(jumpProfileOk); + const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); + const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks); + const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks); + + if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) + { + assert(nextProfileOk); + assert(jumpProfileOk); + } } } #endif // DEBUG