From 672b2c017c5730335e037b0f9c7fa95434bed839 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 29 May 2025 13:08:55 -0400 Subject: [PATCH 1/7] More precise loop iteration computation --- src/coreclr/jit/optimizer.cpp | 67 +++++------------------------------ 1 file changed, 8 insertions(+), 59 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 61e264a7df2ff5..abdfe49a753568 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2012,6 +2012,12 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", block->bbNum, bTop->bbNum, bTest->bbNum); + const weight_t loopIterations = bTest->bbWeight / BasicBlock::getCalledCount(this); + if (loopIterations == BB_ZERO_WEIGHT) + { + return false; + } + // Estimate the cost of cloning the entire test block. // // Note: it would help throughput to compute the maximum cost @@ -2035,65 +2041,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) estDupCostSz += tree->GetCostSz(); } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; - - // If we have profile data then we calculate the number of times - // the loop will iterate into loopIterations - if (fgIsUsingProfileWeights()) - { - // Only rely upon the profile weight when all three of these blocks - // have good profile weights - if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight()) - { - // If this while loop never iterates then don't bother transforming - // - if (weightTop == BB_ZERO_WEIGHT) - { - return true; - } - - haveProfileWeights = true; - - // We generally expect weightTest > weightTop - // - // Tolerate small inconsistencies... - // - if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest)) - { - JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n", - weightBlock, weightTop, weightTest); - } - else - { - // Determine average iteration count - // - // weightTop is the number of time this loop executes - // weightTest is the number of times that we consider entering or remaining in the loop - // loopIterations is the average number of times that this loop iterates - // - weight_t loopEntries = weightTest - weightTop; - - // If profile is inaccurate, try and use other data to provide a credible estimate. - // The value should at least be >= weightBlock. - // - if (loopEntries < weightBlock) - { - loopEntries = weightBlock; - } - - loopIterations = weightTop / loopEntries; - } - } - else - { - JITDUMP("Missing profile data for loop!\n"); - } - } - unsigned maxDupCostSz = 34; if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30)) @@ -2158,6 +2105,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } + const bool haveProfileWeights = fgIsUsingProfileWeights(); + #ifdef DEBUG if (verbose) { From d8e96d6decb058fcd2fb84126dc374658a834476 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 29 May 2025 13:32:08 -0400 Subject: [PATCH 2/7] Skip loops with low iteration counts --- src/coreclr/jit/optimizer.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index abdfe49a753568..e188da058370e9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2012,10 +2012,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", block->bbNum, bTop->bbNum, bTest->bbNum); - const weight_t loopIterations = bTest->bbWeight / BasicBlock::getCalledCount(this); - if (loopIterations == BB_ZERO_WEIGHT) + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + const bool haveProfileWeights = fgIsUsingProfileWeights(); + if (haveProfileWeights) { - return false; + loopIterations = bTest->GetTrueEdge()->getLikelyWeight() / BasicBlock::getCalledCount(this); + if (loopIterations < BB_LOOP_WEIGHT_SCALE) + { + return false; + } } // Estimate the cost of cloning the entire test block. @@ -2105,8 +2110,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - const bool haveProfileWeights = fgIsUsingProfileWeights(); - #ifdef DEBUG if (verbose) { From 464d8e43e90addeee4b0d67c78e38e70a7e62501 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 29 May 2025 13:40:25 -0400 Subject: [PATCH 3/7] Comments --- src/coreclr/jit/optimizer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index e188da058370e9..f13170b41ea63a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2012,6 +2012,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", block->bbNum, bTop->bbNum, bTest->bbNum); + // Don't bother inverting the loop if PGO data suggests it doesn't iterate more than a few times. weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; const bool haveProfileWeights = fgIsUsingProfileWeights(); if (haveProfileWeights) @@ -2019,6 +2020,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) loopIterations = bTest->GetTrueEdge()->getLikelyWeight() / BasicBlock::getCalledCount(this); if (loopIterations < BB_LOOP_WEIGHT_SCALE) { + JITDUMP("Loop iterates only " FMT_WT " times. Skipping loop inversion.\n", loopIterations); return false; } } From 482aec8cbc1dd17d06601c9ea0c525c0b13f4f0a Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 30 May 2025 15:50:42 -0400 Subject: [PATCH 4/7] Remove heuristic --- src/coreclr/jit/optimizer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index f13170b41ea63a..ea39b957a166c4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2012,17 +2012,14 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", block->bbNum, bTop->bbNum, bTest->bbNum); - // Don't bother inverting the loop if PGO data suggests it doesn't iterate more than a few times. weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; const bool haveProfileWeights = fgIsUsingProfileWeights(); + + // If we have PGO data, we can estimate the loop iteration count + // by computing how many times the loop entry branch is taken per method invoke, on average. if (haveProfileWeights) { loopIterations = bTest->GetTrueEdge()->getLikelyWeight() / BasicBlock::getCalledCount(this); - if (loopIterations < BB_LOOP_WEIGHT_SCALE) - { - JITDUMP("Loop iterates only " FMT_WT " times. Skipping loop inversion.\n", loopIterations); - return false; - } } // Estimate the cost of cloning the entire test block. From 0e0ba9067cc3577a3528a94bc19390537cc7bfde Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 2 Jun 2025 15:14:14 -0400 Subject: [PATCH 5/7] Use loop entry-relative iteration computation --- src/coreclr/jit/optimizer.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index cb075fa4b812d3..501420804e9447 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1948,10 +1948,22 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) const bool haveProfileWeights = fgIsUsingProfileWeights(); // If we have PGO data, we can estimate the loop iteration count - // by computing how many times the loop entry branch is taken per method invoke, on average. + // by computing the loop body's weight relative to its entry weight. if (haveProfileWeights) { - loopIterations = bTest->GetTrueEdge()->getLikelyWeight() / BasicBlock::getCalledCount(this); + // If the loop never iterates, don't invert it. + // + if (bTop->isRunRarely()) + { + return false; + } + + // If the profile is inaccurate such that bTest->bbWeight < bTop->bbWeight, + // try to provide a credible estimate using the entering block's weight. + // The value should be at least the weight of 'block'. + // + const weight_t loopEntries = max(block->bbWeight, bTest->bbWeight - bTop->bbWeight); + loopIterations = (loopEntries == BB_ZERO_WEIGHT) ? BB_ZERO_WEIGHT : bTop->bbWeight / loopEntries; } // Estimate the cost of cloning the entire test block. From c7083b63a396227cd8994586f5482f0a1680d3ae Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 4 Aug 2025 15:17:13 -0400 Subject: [PATCH 6/7] Cleanup --- src/coreclr/jit/optimizer.cpp | 98 +++++++++++++---------------------- 1 file changed, 36 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 235b3c62741116..2c0effe0e6b73d 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1901,6 +1901,42 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", condBlock->bbNum, loop->GetIndex()); + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + const bool haveProfileWeights = fgIsUsingProfileWeights(); + const weight_t weightPreheader = preheader->bbWeight; + const weight_t weightCond = condBlock->bbWeight; + const weight_t weightStayInLoopSucc = stayInLoopSucc->bbWeight; + + // If we have profile data then we calculate the number of times + // the loop will iterate into loopIterations + if (haveProfileWeights) + { + assert(preheader->hasProfileWeight()); + assert(condBlock->hasProfileWeight()); + assert(stayInLoopSucc->hasProfileWeight()); + + // If this while loop never iterates then don't bother transforming + // + if (weightStayInLoopSucc == BB_ZERO_WEIGHT) + { + JITDUMP("No loop-inversion for " FMT_LP " since the in-loop successor " FMT_BB " has 0 weight\n", + loop->GetIndex(), preheader->bbNum); + return false; + } + + // Determine average iteration count + // + // weightTop is the number of time this loop executes + // weightTest is the number of times that we consider entering or remaining in the loop + // loopIterations is the average number of times that this loop iterates + // + // If profile is inaccurate, use other data to provide a credible estimate. + // The value should at least be >= weightPreheader. + // + const weight_t loopEntries = max(weightPreheader, weightCond - weightStayInLoopSucc); + loopIterations = weightStayInLoopSucc / loopEntries; + } + // Check if loop is small enough to consider for inversion. // Large loops are less likely to benefit from inversion. const int invertSizeLimit = JitConfig.JitLoopInversionSizeLimit(); @@ -1957,68 +1993,6 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - weight_t const weightPreheader = preheader->bbWeight; - weight_t const weightCond = condBlock->bbWeight; - weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; - - // If we have profile data then we calculate the number of times - // the loop will iterate into loopIterations - if (fgIsUsingProfileWeights()) - { - // Only rely upon the profile weight when all three of these blocks - // have good profile weights - if (preheader->hasProfileWeight() && condBlock->hasProfileWeight() && stayInLoopSucc->hasProfileWeight()) - { - // If this while loop never iterates then don't bother transforming - // - if (weightStayInLoopSucc == BB_ZERO_WEIGHT) - { - JITDUMP("No loop-inversion for " FMT_LP " since the in-loop successor " FMT_BB " has 0 weight\n", - loop->GetIndex(), preheader->bbNum); - return false; - } - - haveProfileWeights = true; - - // We generally expect weightCond > weightStayInLoopSucc - // - // Tolerate small inconsistencies... - // - if (!fgProfileWeightsConsistent(weightPreheader + weightStayInLoopSucc, weightCond)) - { - JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT - ", cond " FMT_WT "\n", - weightPreheader, weightStayInLoopSucc, weightCond); - } - else - { - // Determine average iteration count - // - // weightTop is the number of time this loop executes - // weightTest is the number of times that we consider entering or remaining in the loop - // loopIterations is the average number of times that this loop iterates - // - weight_t loopEntries = weightCond - weightStayInLoopSucc; - - // If profile is inaccurate, try and use other data to provide a credible estimate. - // The value should at least be >= weightBlock. - // - if (loopEntries < weightPreheader) - { - loopEntries = weightPreheader; - } - - loopIterations = weightStayInLoopSucc / loopEntries; - } - } - else - { - JITDUMP("Missing profile data for loop!\n"); - } - } - unsigned maxDupCostSz = 34; if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30)) From 80cb61f2904f6e6074c4177e76710eeb54ffb20f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 4 Aug 2025 15:17:44 -0400 Subject: [PATCH 7/7] Remove dead code --- src/coreclr/jit/compiler.h | 2 -- src/coreclr/jit/fgbasic.cpp | 33 --------------------------------- 2 files changed, 35 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8707af8c967e0f..096a9e4fb1120d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6195,8 +6195,6 @@ class Compiler bool fgDedupReturnComparison(BasicBlock* block); - bool fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bDest, BasicBlock* bSrc = nullptr); - bool fgUpdateFlowGraph(bool doTailDup = false, bool isPhase = false); PhaseStatus fgUpdateFlowGraphPhase(); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index b5ffa049a8562d..02bb832c24a4f2 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -5218,39 +5218,6 @@ void Compiler::fgPrepareCallFinallyRetForRemoval(BasicBlock* block) block->SetKind(BBJ_ALWAYS); } -/***************************************************************************** - * - * Is the BasicBlock bJump a forward branch? - * Optionally bSrc can be supplied to indicate that - * bJump must be forward with respect to bSrc - */ -bool Compiler::fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bDest, BasicBlock* bSrc /* = NULL */) -{ - assert((bJump->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET) && bJump->TargetIs(bDest)) || - (bJump->KindIs(BBJ_COND) && bJump->TrueTargetIs(bDest))); - - bool result = false; - BasicBlock* bTemp = (bSrc == nullptr) ? bJump : bSrc; - - while (true) - { - bTemp = bTemp->Next(); - - if (bTemp == nullptr) - { - break; - } - - if (bTemp == bDest) - { - result = true; - break; - } - } - - return result; -} - /***************************************************************************** * * Function called to move the range of blocks [bStart .. bEnd].