From a0e3e4df3746fda4d4955f056a94236538518707 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Oct 2024 17:09:46 +0100 Subject: [PATCH 01/19] JIT: Make loop inversion graph based Rewrite loop inversion to be graph based and to use the new loop representation. --- src/coreclr/jit/compiler.cpp | 8 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgdiagnostic.cpp | 7 +- src/coreclr/jit/optimizer.cpp | 524 +++++++++---------------------- 4 files changed, 166 insertions(+), 375 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index bdb8b686a176fd..3059cf00145c53 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4869,10 +4869,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl if (opts.OptimizationEnabled()) { - // Invert loops - // - DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); - // Run some flow graph optimizations (but don't reorder) // DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); @@ -4900,6 +4896,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights); + // Invert loops + // + DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); + // Clone loops with optimization opportunities, and choose one based on dynamic condition evaluation. // DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 56410dfd3035a9..7989ef632bdefa 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7145,7 +7145,7 @@ class Compiler OptInvertCountTreeInfoType optInvertCountTreeInfo(GenTree* tree); - bool optInvertWhileLoop(BasicBlock* block); + bool optTryInvertWhileLoop(FlowGraphNaturalLoop* loop); bool optIfConvert(BasicBlock* block); private: diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index c355d02c558c30..745c7d987780c4 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4714,7 +4714,12 @@ void Compiler::fgDebugCheckLoops() loop->VisitRegularExitBlocks([=](BasicBlock* exit) { for (BasicBlock* pred : exit->PredBlocks()) { - assert(loop->ContainsBlock(pred)); + if (!loop->ContainsBlock(pred)) + { + JITDUMP("Loop " FMT_LP " exit " FMT_BB " has non-loop predecessor " FMT_BB "\n", + loop->GetIndex(), exit->bbNum, pred->bbNum); + assert(!"Loop exit has non-loop predecessor"); + } } return BasicBlockVisit::Continue; }); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 161478d63f0c7a..4d625eb3006371 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1850,13 +1850,11 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t } //----------------------------------------------------------------------------- -// optInvertWhileLoop: modify flow and duplicate code so that for/while loops are +// optTryInvertWhileLoop: modify flow and duplicate code so that for/while loops are // entered at top and tested at bottom (aka loop rotation or bottom testing). // Creates a "zero trip test" condition which guards entry to the loop. // Enables loop invariant hoisting and loop cloning, which depend on -// `do {} while` format loops. Enables creation of a pre-header block after the -// zero trip test to place code that only runs if the loop is guaranteed to -// run at least once. +// `do {} while` format loops. // // Arguments: // block -- block that may be the predecessor of the un-rotated loop's test block. @@ -1864,156 +1862,95 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t // Returns: // true if any IR changes possibly made (used to determine phase return status) // -// Notes: -// Uses a simple lexical screen to detect likely loops. -// -// Specifically, we're looking for the following case: -// -// block: -// ... -// jmp test // `block` argument -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// -// If we find this, and the condition is simple enough, we change -// the loop to the following: -// -// block: -// ... -// jmp bNewCond -// bNewCond: -// ..stmts.. // duplicated cond block statements -// cond // duplicated cond -// jfalse join -// // else fall-through -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// join: -// -// Makes no changes if the flow pattern match fails. -// -// May not modify a loop if profile is unfavorable, if the cost of duplicating -// code is large (factoring in potential CSEs). -// -bool Compiler::optInvertWhileLoop(BasicBlock* block) +bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) { - assert(opts.OptimizationEnabled()); - assert(compCodeOpt() != SMALL_CODE); - - // Does the BB end with an unconditional jump? + // Should have preheaders at this point + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - if (!block->KindIs(BBJ_ALWAYS) || block->JumpsToNext()) - { - return false; - } + ArrayStack duplicatedBlocks(getAllocator(CMK_LoopOpt)); - if (block->HasFlag(BBF_KEEP_BBJ_ALWAYS)) + BasicBlock* condBlock = loop->GetHeader(); + while (true) { - // It can't be one of the ones we use for our exception magic - return false; - } + if (!BasicBlock::sameEHRegion(preheader, condBlock)) + { + JITDUMP("No loop-inversion for " FMT_LP + " since we could not find a condition block in the same EH region as the preheader\n", + loop->GetIndex()); + return false; + } - // Get hold of the jump target - BasicBlock* const bTest = block->GetTarget(); + duplicatedBlocks.Push(condBlock); - // Does the bTest consist of 'jtrue(cond) block' ? - if (!bTest->KindIs(BBJ_COND)) - { - return false; - } + if (condBlock->KindIs(BBJ_ALWAYS)) + { + condBlock = condBlock->GetTarget(); - // bTest must be a backwards jump to block->bbNext - // This will be the top of the loop. - // - BasicBlock* const bTop = bTest->GetTrueTarget(); + if (!loop->ContainsBlock(condBlock) || (condBlock == loop->GetHeader())) + { + JITDUMP("No loop-inversion for " FMT_LP "; ran out of blocks following BBJ_ALWAYS blocks\n", + loop->GetIndex()); + return false; + } - if (!block->NextIs(bTop)) - { - return false; - } + continue; + } - // Since bTest is a BBJ_COND it will have a false target - // - BasicBlock* const bJoin = bTest->GetFalseTarget(); - noway_assert(bJoin != nullptr); + if (!condBlock->KindIs(BBJ_COND)) + { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any BBJ_COND block\n", loop->GetIndex()); + return false; + } - // 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition - // in a new block after 'block', and the condition might include exception throwing code. - // On non-funclet platforms (x86), the catch exit is a BBJ_ALWAYS, but we don't want that to - // be considered as the head of a loop, so also disallow different handler regions. - if (!BasicBlock::sameEHRegion(block, bTest)) - { - return false; + break; } - // The duplicated condition block will branch to bTest->GetFalseTarget(), so that also better be in the - // same try region (or no try region) to avoid generating illegal flow. - if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin)) - { - return false; - } + bool trueExits = !loop->ContainsBlock(condBlock->GetTrueTarget()); + bool falseExits = !loop->ContainsBlock(condBlock->GetFalseTarget()); - // It has to be a forward jump. Defer this check until after all the cheap checks - // are done, since it iterates forward in the block list looking for block's target. - // TODO-CQ: Check if we can also optimize the backwards jump as well. - // - if (!fgIsForwardBranch(block, block->GetTarget())) + if (trueExits == falseExits) { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any exiting BBJ_COND block\n", + loop->GetIndex()); return false; } - // Find the loop termination test at the bottom of the loop. - Statement* const condStmt = bTest->lastStmt(); + BasicBlock* exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); - // Verify the test block ends with a conditional that we can manipulate. - GenTree* const condTree = condStmt->GetRootNode(); - noway_assert(condTree->gtOper == GT_JTRUE); - if (!condTree->AsOp()->gtOp1->OperIsCompare()) + // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will + // have to split the exit such that old loop edges exit to one half, while the duplicated condition + // exits to the other half. This will result in jump into the middle of a try-region, which is illegal. + // TODO: We can fix this by placing the first half of the split (which will be an empty block) outside + // the try region. + if (!BasicBlock::sameEHRegion(preheader, exit)) { + JITDUMP("No loop-inversion for " FMT_LP " since the preheader " FMT_BB " and exit " FMT_BB + " are in different EH regions\n", + loop->GetIndex(), preheader->bbNum, exit->bbNum); return false; } - JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", - block->bbNum, bTop->bbNum, bTest->bbNum); - - // Estimate the cost of cloning the entire test block. - // - // Note: it would help throughput to compute the maximum cost - // first and early out for large bTest blocks, as we are doing two - // tree walks per tree. But because of this helper call scan, the - // maximum cost depends on the trees in the block. - // - // We might consider flagging blocks with hoistable helper calls - // during importation, so we can avoid the helper search and - // implement an early bail out for large blocks with no helper calls. - // - // Note that gtPrepareCost can cause operand swapping, so we must - // return `true` (possible IR change) from here on. + JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", + condBlock->bbNum, loop->GetIndex()); unsigned estDupCostSz = 0; - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - GenTree* tree = stmt->GetRootNode(); - gtPrepareCost(tree); - estDupCostSz += tree->GetCostSz(); + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) + { + GenTree* tree = stmt->GetRootNode(); + gtPrepareCost(tree); + estDupCostSz += tree->GetCostSz(); + } } weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; bool allProfileWeightsAreValid = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; + weight_t const weightPreheader = preheader->bbWeight; + weight_t const weightCond = condBlock->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -2021,46 +1958,19 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) { // Only rely upon the profile weight when all three of these blocks // have good profile weights - if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight()) + if (preheader->hasProfileWeight() && condBlock->hasProfileWeight()) { // If this while loop never iterates then don't bother transforming // - if (weightTop == BB_ZERO_WEIGHT) + if (weightPreheader == BB_ZERO_WEIGHT) { - return true; - } - - // We generally expect weightTest > weightTop - // - // Tolerate small inconsistencies... - // - if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest)) - { - JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n", - weightBlock, weightTop, weightTest); + JITDUMP("No loop-inversion for " FMT_LP " since the the preheader " FMT_BB " has 0 weight\n", + loop->GetIndex(), preheader->bbNum); + return false; } - else - { - allProfileWeightsAreValid = true; - - // Determine average iteration count - // - // weightTop is the number of time this loop executes - // weightTest is the number of times that we consider entering or remaining in the loop - // loopIterations is the average number of times that this loop iterates - // - weight_t loopEntries = weightTest - weightTop; - - // If profile is inaccurate, try and use other data to provide a credible estimate. - // The value should at least be >= weightBlock. - // - if (loopEntries < weightBlock) - { - loopEntries = weightBlock; - } - loopIterations = weightTop / loopEntries; - } + loopIterations = weightCond / weightPreheader; + allProfileWeightsAreValid = true; } else { @@ -2085,9 +1995,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - // If the compare has too high cost then we don't want to dup. - - bool costIsTooHigh = (estDupCostSz > maxDupCostSz); + bool costIsTooHigh = estDupCostSz > maxDupCostSz; OptInvertCountTreeInfoType optInvertTotalInfo = {}; if (costIsTooHigh) @@ -2101,32 +2009,37 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // // If the condition has array.Length operations, also boost, as they are likely to be CSE'd. - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height() && costIsTooHigh; i++) { - GenTree* tree = stmt->GetRootNode(); - - OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); - optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; - optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - - if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* const stmt : block->Statements()) { - // Calculate a new maximum cost. We might be able to early exit. + GenTree* tree = stmt->GetRootNode(); - unsigned newMaxDupCostSz = - maxDupCostSz + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + - 8 * optInvertTotalInfo.arrayLengthCount; + OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); + optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; + optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - // Is the cost too high now? - costIsTooHigh = (estDupCostSz > newMaxDupCostSz); - if (!costIsTooHigh) + if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) { - // No need counting any more trees; we're going to do the transformation. - JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " - "block " FMT_BB, - dspTreeID(tree), bTest->bbNum); - maxDupCostSz = newMaxDupCostSz; // for the JitDump output below - break; + // Calculate a new maximum cost. We might be able to early exit. + + unsigned newMaxDupCostSz = + maxDupCostSz + + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + + 8 * optInvertTotalInfo.arrayLengthCount; + + // Is the cost too high now? + costIsTooHigh = (estDupCostSz > newMaxDupCostSz); + if (!costIsTooHigh) + { + // No need counting any more trees; we're going to do the transformation. + JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " + "block " FMT_BB, + dspTreeID(tree), block->bbNum); + maxDupCostSz = newMaxDupCostSz; // for the JitDump output below + break; + } } } } @@ -2140,7 +2053,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) printf( "\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, validProfileWeights = %s\n", - dspTreeID(condTree), costIsTooHigh ? "not done" : "performed", estDupCostSz, + dspTreeID(condBlock->lastStmt()->GetRootNode()), costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, optInvertTotalInfo.sharedStaticHelperCount, dspBool(allProfileWeightsAreValid)); } @@ -2151,182 +2064,56 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return true; } - bool foundCondTree = false; - - // Create a new block after `block` to put the copied condition code. - // - BasicBlock* const bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true); - - // Clone each statement in bTest and append to bNewCond. - for (Statement* const stmt : bTest->Statements()) - { - GenTree* originalTree = stmt->GetRootNode(); - GenTree* clonedTree = gtCloneExpr(originalTree); - - // Special case handling needed for the conditional jump tree - if (originalTree == condTree) - { - foundCondTree = true; - - // Get the compare subtrees - GenTree* originalCompareTree = originalTree->AsOp()->gtOp1; - GenTree* clonedCompareTree = clonedTree->AsOp()->gtOp1; - assert(originalCompareTree->OperIsCompare()); - assert(clonedCompareTree->OperIsCompare()); - - // The original test branches to remain in the loop. The - // new cloned test will branch to avoid the loop. So the - // cloned compare needs to reverse the branch condition. - gtReverseCond(clonedCompareTree); - } + // Split the preheader so we can duplicate the statements into it. The new + // block will be a BBJ_COND node. + BasicBlock* newCond = fgSplitBlockAtEnd(preheader); - Statement* clonedStmt = fgNewStmtAtEnd(bNewCond, clonedTree); + // Split the new block once more to create a proper preheader, so we end up + // with preheader (always) -> newCond (cond) -> newPreheader (always) -> header + BasicBlock* newPreheader = fgSplitBlockAtEnd(newCond); - if (opts.compDbgInfo) - { - clonedStmt->SetDebugInfo(stmt->GetDebugInfo()); - } - } + // Make sure exit stays canonical + BasicBlock* nonEnterBlock = fgSplitBlockAtBeginning(exit); - assert(foundCondTree); + JITDUMP("New preheader is " FMT_BB "\n", newPreheader->bbNum); + JITDUMP("Duplicated condition block is " FMT_BB "\n", newCond->bbNum); + JITDUMP("Old exit is " FMT_BB ", new non-enter block is " FMT_BB "\n", exit->bbNum, nonEnterBlock->bbNum); - // Flag the block that received the copy as potentially having various constructs. - bNewCond->CopyFlags(bTest, BBF_COPY_PROPAGATE); + // Get the newCond -> newPreheader edge + FlowEdge* newCondToNewPreheader = newCond->GetTargetEdge(); - // Fix flow and profile - // - bNewCond->inheritWeight(block); + // Add newCond -> nonEnterBlock + FlowEdge* newCondToNewExit = fgAddRefPred(nonEnterBlock, newCond); - if (allProfileWeightsAreValid) - { - weight_t const delta = weightTest - weightTop; + newCond->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, + trueExits ? newCondToNewPreheader : newCondToNewExit); - // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight. - // But this might not be the case if profile data is inconsistent. - // - // And if bTest has multiple outside edges we want to account for the weight of them all. - // - if (delta > block->bbWeight) - { - bNewCond->setBBProfileWeight(delta); - } - } - - // Update pred info - // - // For now we set the likelihood of the newCond branch to match - // the likelihood of the test branch (though swapped, since we're - // currently reversing the condition). This may or may not match - // the block weight adjustments we're making. All this becomes - // easier to reconcile once we rely on edge likelihoods more and - // have synthesis running (so block weights ==> frequencies). - // - // Until then we won't worry that edges and blocks are potentially - // out of sync. - // - FlowEdge* const testTopEdge = bTest->GetTrueEdge(); - FlowEdge* const testJoinEdge = bTest->GetFalseEdge(); - FlowEdge* const newCondJoinEdge = fgAddRefPred(bJoin, bNewCond, testJoinEdge); - FlowEdge* const newCondTopEdge = fgAddRefPred(bTop, bNewCond, testTopEdge); - - bNewCond->SetTrueEdge(newCondJoinEdge); - bNewCond->SetFalseEdge(newCondTopEdge); - - fgRedirectTargetEdge(block, bNewCond); - assert(block->JumpsToNext()); - - // Move all predecessor edges that look like loop entry edges to point to the new cloned condition - // block, not the existing condition block. The idea is that if we only move `block` to point to - // `bNewCond`, but leave other `bTest` predecessors still pointing to `bTest`, when we eventually - // recognize loops, the loop will appear to have multiple entries, which will prevent optimization. - // We don't have loops yet, but blocks should be in increasing lexical numbered order, so use that - // as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness - // is maintained no matter which condition block we point to, but we'll lose optimization potential - // (and create spaghetti code) if we get it wrong. - // - unsigned const loopFirstNum = bTop->bbNum; - unsigned const loopBottomNum = bTest->bbNum; - for (BasicBlock* const predBlock : bTest->PredBlocksEditing()) - { - unsigned const bNum = predBlock->bbNum; - if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) - { - // Looks like the predecessor is from within the potential loop; skip it. - continue; - } + newCond->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); + newCond->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); - // Redirect the predecessor to the new block. - JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, - bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); + BasicBlock* newHeader = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); - switch (predBlock->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - case BBJ_COND: - case BBJ_SWITCH: - case BBJ_EHFINALLYRET: - fgReplaceJumpTarget(predBlock, bTest, bNewCond); - break; + // Add newPreheader -> newHeader + FlowEdge* newPreheaderToNewHeader = fgAddRefPred(newHeader, newPreheader, newPreheader->GetTargetEdge()); - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - // These block types should not need redirecting - break; + // Remove newPreheader -> header + fgRemoveRefPred(newPreheader->GetTargetEdge()); - default: - assert(!"Unexpected bbKind for predecessor block"); - break; - } - } + // Update newPreheader to point to newHeader + newPreheader->SetTargetEdge(newPreheaderToNewHeader); - // If we have profile data for all blocks and we know that we are cloning the - // `bTest` block into `bNewCond` and thus changing the control flow from `block` so - // that it no longer goes directly to `bTest` anymore, we have to adjust - // various weights. - // - if (allProfileWeightsAreValid) + // Duplicate all the code now + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - // Update the weight for bTest. Normally, this reduces the weight of the bTest, except in odd - // cases of stress modes with inconsistent weights. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest, - weightTop); - bTest->inheritWeight(bTop); - -#ifdef DEBUG - // If we're checking profile data, see if profile for the two target blocks is consistent. - // - if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) { - if (JitConfig.JitProfileChecks() > 0) - { - const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks); - - if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) - { - assert(nextProfileOk); - assert(jumpProfileOk); - } - } + GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); + fgNewStmtAtEnd(newCond, clonedTree, stmt->GetDebugInfo()); } -#endif // DEBUG - } - -#ifdef DEBUG - if (verbose) - { - printf("\nDuplicated loop exit block at " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")\n", bNewCond->bbNum, - bNewCond->GetFalseTarget()->bbNum, bTest->bbNum); - printf("Estimated code size expansion is %d\n", estDupCostSz); - fgDumpBlock(bNewCond); - fgDumpBlock(bTest); + newCond->CopyFlags(block, BBF_COPY_PROPAGATE); } -#endif // DEBUG return true; } @@ -2339,9 +2126,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // PhaseStatus Compiler::optInvertLoops() { - noway_assert(opts.OptimizationEnabled()); - noway_assert(fgModified == false); - #if defined(OPT_CONFIG) if (!JitConfig.JitDoLoopInversion()) { @@ -2350,37 +2134,39 @@ PhaseStatus Compiler::optInvertLoops() } #endif // OPT_CONFIG - bool madeChanges = fgRenumberBlocks(); + bool madeChanges = false; - if (compCodeOpt() == SMALL_CODE) - { - // do not invert any loops - } - else + if (compCodeOpt() != SMALL_CODE) { - for (BasicBlock* const block : Blocks()) - { - // Make sure the appropriate fields are initialized - // - if (block->bbWeight == BB_ZERO_WEIGHT) - { - // Zero weighted block can't have a LOOP_HEAD flag - noway_assert(block->isLoopHead() == false); - continue; - } + fgDumpFlowGraph(PHASE_INVERT_LOOPS, PhasePosition::PostPhase); - if (optInvertWhileLoop(block)) - { - madeChanges = true; - } + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) + { + madeChanges |= optTryInvertWhileLoop(loop); } } - if (fgModified) + if (madeChanges) { - // Reset fgModified here as we've done a consistent set of edits. - // - fgModified = false; + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + // In normal cases no further canonicalization will be required as we + // try to ensure loops stay canonicalized during inversion. However, + // duplicating the condition outside an inner loop can introduce new + // exits in the parent loop, so we may rarely need to recanonicalize + // exit blocks. + if (optCanonicalizeLoops()) + { + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + + fgDebugCheckLoops(); + + fgRenumberBlocks(); } return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; From 5df0255da6cec94cae87c13bc55b7e3078389013 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Oct 2024 17:14:00 +0100 Subject: [PATCH 02/19] Remove debug code --- src/coreclr/jit/optimizer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 4d625eb3006371..504bd20c781495 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2138,8 +2138,6 @@ PhaseStatus Compiler::optInvertLoops() if (compCodeOpt() != SMALL_CODE) { - fgDumpFlowGraph(PHASE_INVERT_LOOPS, PhasePosition::PostPhase); - for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { madeChanges |= optTryInvertWhileLoop(loop); From b75aea72359f31d151f25aa6396ccd7375cf9cbd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 29 Oct 2024 17:34:45 +0100 Subject: [PATCH 03/19] Fix release build --- src/coreclr/jit/optimizer.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 504bd20c781495..a2a15bbf88c749 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2109,7 +2109,17 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) for (Statement* stmt : block->Statements()) { GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); - fgNewStmtAtEnd(newCond, clonedTree, stmt->GetDebugInfo()); + Statement* clonedStmt = fgNewStmtAtEnd(newCond, clonedTree, stmt->GetDebugInfo()); + + if (stmt == condBlock->lastStmt()) + { + // TODO: This ought not to be necessary, but has large negative diffs if we don't do it + assert(clonedStmt->GetRootNode()->OperIs(GT_JTRUE)); + clonedStmt->GetRootNode()->AsUnOp()->gtOp1 = gtReverseCond(clonedStmt->GetRootNode()->gtGetOp1()); + newCond->SetCond(newCond->GetFalseEdge(), newCond->GetTrueEdge()); + } + + DISPSTMT(clonedStmt); } newCond->CopyFlags(block, BBF_COPY_PROPAGATE); @@ -2162,8 +2172,6 @@ PhaseStatus Compiler::optInvertLoops() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } - fgDebugCheckLoops(); - fgRenumberBlocks(); } From d75f60e4e770912a63b6773362ea30d4bf6f29a7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 30 Oct 2024 15:41:55 +0100 Subject: [PATCH 04/19] Avoid inverting already-inverted loops, duplicate weight manipulation from old inversion --- src/coreclr/jit/optimizer.cpp | 124 +++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index a2a15bbf88c749..0c718e991a2ffa 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1917,6 +1917,14 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } BasicBlock* exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); + BasicBlock* stayInLoopSucc = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); + + // If the condition is already a latch, then the loop is already inverted + if (stayInLoopSucc == loop->GetHeader()) + { + JITDUMP("No loop-inversion for " FMT_LP " since it is already inverted\n", loop->GetIndex()); + return false; + } // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will // have to split the exit such that old loop edges exit to one half, while the duplicated condition @@ -1951,6 +1959,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) bool allProfileWeightsAreValid = false; weight_t const weightPreheader = preheader->bbWeight; weight_t const weightCond = condBlock->bbWeight; + weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -1958,19 +1967,48 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) { // Only rely upon the profile weight when all three of these blocks // have good profile weights - if (preheader->hasProfileWeight() && condBlock->hasProfileWeight()) + if (preheader->hasProfileWeight() && condBlock->hasProfileWeight() && stayInLoopSucc->hasProfileWeight()) { // If this while loop never iterates then don't bother transforming // - if (weightPreheader == BB_ZERO_WEIGHT) + if (weightStayInLoopSucc == BB_ZERO_WEIGHT) { - JITDUMP("No loop-inversion for " FMT_LP " since the the preheader " FMT_BB " has 0 weight\n", + JITDUMP("No loop-inversion for " FMT_LP " since the in-loop successor " FMT_BB " has 0 weight\n", loop->GetIndex(), preheader->bbNum); return false; } - loopIterations = weightCond / weightPreheader; - allProfileWeightsAreValid = true; + // We generally expect weightCond > weightStayInLoopSucc + // + // Tolerate small inconsistencies... + // + if (!fgProfileWeightsConsistent(weightPreheader + weightStayInLoopSucc, weightCond)) + { + JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT ", cond " FMT_WT "\n", + weightPreheader, weightStayInLoopSucc, weightCond); + } + else + { + allProfileWeightsAreValid = true; + + // Determine average iteration count + // + // weightTop is the number of time this loop executes + // weightTest is the number of times that we consider entering or remaining in the loop + // loopIterations is the average number of times that this loop iterates + // + weight_t loopEntries = weightCond - weightStayInLoopSucc; + + // If profile is inaccurate, try and use other data to provide a credible estimate. + // The value should at least be >= weightBlock. + // + if (loopEntries < weightPreheader) + { + loopEntries = weightPreheader; + } + + loopIterations = weightStayInLoopSucc / loopEntries; + } } else { @@ -2068,6 +2106,21 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) // block will be a BBJ_COND node. BasicBlock* newCond = fgSplitBlockAtEnd(preheader); + if (allProfileWeightsAreValid) + { + weight_t const delta = weightCond - weightStayInLoopSucc; + + // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight. + // But this might not be the case if profile data is inconsistent. + // + // And if bTest has multiple outside edges we want to account for the weight of them all. + // + if (delta > preheader->bbWeight) + { + newCond->setBBProfileWeight(delta); + } + } + // Split the new block once more to create a proper preheader, so we end up // with preheader (always) -> newCond (cond) -> newPreheader (always) -> header BasicBlock* newPreheader = fgSplitBlockAtEnd(newCond); @@ -2091,16 +2144,14 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) newCond->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); newCond->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); - BasicBlock* newHeader = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); - - // Add newPreheader -> newHeader - FlowEdge* newPreheaderToNewHeader = fgAddRefPred(newHeader, newPreheader, newPreheader->GetTargetEdge()); + // Add newPreheader -> stayInLoopSucc + FlowEdge* newPreheaderToInLoopSucc = fgAddRefPred(stayInLoopSucc, newPreheader, newPreheader->GetTargetEdge()); // Remove newPreheader -> header fgRemoveRefPred(newPreheader->GetTargetEdge()); // Update newPreheader to point to newHeader - newPreheader->SetTargetEdge(newPreheaderToNewHeader); + newPreheader->SetTargetEdge(newPreheaderToInLoopSucc); // Duplicate all the code now for (int i = 0; i < duplicatedBlocks.Height(); i++) @@ -2125,6 +2176,59 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) newCond->CopyFlags(block, BBF_COPY_PROPAGATE); } + // If we have profile data for all blocks and we know that we are cloning the + // `bTest` block into `bNewCond` and thus changing the control flow from `block` so + // that it no longer goes directly to `bTest` anymore, we have to adjust + // various weights. + // + if (allProfileWeightsAreValid) + { + // Update the weight for the duplicated blocks. Normally, this reduces + // the weight of condBlock, except in odd cases of stress modes with + // inconsistent weights. + // + for (int i = 0; i < duplicatedBlocks.Height(); i++) + { + BasicBlock* block = duplicatedBlocks.Bottom(i); + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", block->bbNum, weightCond, + weightStayInLoopSucc); + block->inheritWeight(stayInLoopSucc); + } + +#ifdef DEBUG + // If we're checking profile data, see if profile for the two target blocks is consistent. + // + if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) + { + if (JitConfig.JitProfileChecks() > 0) + { + const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); + const bool nextProfileOk = fgDebugCheckIncomingProfileData(newCond->GetFalseTarget(), checks); + const bool jumpProfileOk = fgDebugCheckIncomingProfileData(newCond->GetTrueTarget(), checks); + + if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) + { + assert(nextProfileOk); + assert(jumpProfileOk); + } + } + } +#endif // DEBUG + } + +#ifdef DEBUG + if (verbose) + { + printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", newCond->bbNum, + loop->GetIndex()); + printf("Estimated code size expansion is %d\n", estDupCostSz); + + fgDumpBlock(newCond); + fgDumpBlock(condBlock); + } +#endif // DEBUG + + return true; } From ddc0ea2f449ec303a80ef685f059ee6ce3c508fb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 30 Oct 2024 16:00:31 +0100 Subject: [PATCH 05/19] Add a couple of quirks --- src/coreclr/jit/optimizer.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 0c718e991a2ffa..3198e2ea4cd8a9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1926,6 +1926,20 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) return false; } + // TODO-Quirk: Remove this + if (preheader->JumpsToNext()) + { + JITDUMP("No loop inversion for " FMT_LP " since its preheader is already fallthrough\n", loop->GetIndex()); + return false; + } + + // TODO-Quirk: Remove this + if (!preheader->NextIs(stayInLoopSucc)) + { + JITDUMP("No loop inversion for " FMT_LP " since its preheader does not fall through to \"stayInLoopSucc\"\n", loop->GetIndex()); + return false; + } + // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will // have to split the exit such that old loop edges exit to one half, while the duplicated condition // exits to the other half. This will result in jump into the middle of a try-region, which is illegal. From 99a432151c6173c41c118aa28716b64e0dc34c24 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 30 Oct 2024 16:23:20 +0100 Subject: [PATCH 06/19] Run jit-format --- src/coreclr/jit/optimizer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3198e2ea4cd8a9..2d55c6c05abdb5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1916,7 +1916,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) return false; } - BasicBlock* exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); + BasicBlock* exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); BasicBlock* stayInLoopSucc = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); // If the condition is already a latch, then the loop is already inverted @@ -1936,7 +1936,8 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) // TODO-Quirk: Remove this if (!preheader->NextIs(stayInLoopSucc)) { - JITDUMP("No loop inversion for " FMT_LP " since its preheader does not fall through to \"stayInLoopSucc\"\n", loop->GetIndex()); + JITDUMP("No loop inversion for " FMT_LP " since its preheader does not fall through to \"stayInLoopSucc\"\n", + loop->GetIndex()); return false; } @@ -1973,7 +1974,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) bool allProfileWeightsAreValid = false; weight_t const weightPreheader = preheader->bbWeight; weight_t const weightCond = condBlock->bbWeight; - weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; + weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -1998,7 +1999,8 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) // if (!fgProfileWeightsConsistent(weightPreheader + weightStayInLoopSucc, weightCond)) { - JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT ", cond " FMT_WT "\n", + JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT + ", cond " FMT_WT "\n", weightPreheader, weightStayInLoopSucc, weightCond); } else @@ -2173,7 +2175,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) BasicBlock* block = duplicatedBlocks.Bottom(i); for (Statement* stmt : block->Statements()) { - GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); + GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); Statement* clonedStmt = fgNewStmtAtEnd(newCond, clonedTree, stmt->GetDebugInfo()); if (stmt == condBlock->lastStmt()) @@ -2233,8 +2235,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) #ifdef DEBUG if (verbose) { - printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", newCond->bbNum, - loop->GetIndex()); + printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", newCond->bbNum, loop->GetIndex()); printf("Estimated code size expansion is %d\n", estDupCostSz); fgDumpBlock(newCond); @@ -2242,7 +2243,6 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } #endif // DEBUG - return true; } From 757d14c162a5ef1349183c37b935574051ee739b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 30 Oct 2024 16:25:46 +0100 Subject: [PATCH 07/19] Add a metric for loops inverted --- src/coreclr/jit/jitmetadatalist.h | 1 + src/coreclr/jit/optimizer.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 4642ca14d7b7c9..452793cd81af0d 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -35,6 +35,7 @@ JITMETADATAMETRIC(GCInfoBytes, int, JIT_M JITMETADATAMETRIC(EHClauseCount, int, 0) JITMETADATAMETRIC(PhysicallyPromotedFields, int, 0) JITMETADATAMETRIC(LoopsFoundDuringOpts, int, 0) +JITMETADATAMETRIC(LoopsInverted, int, 0) JITMETADATAMETRIC(LoopsCloned, int, 0) JITMETADATAMETRIC(LoopsUnrolled, int, 0) JITMETADATAMETRIC(LoopAlignmentCandidates, int, 0) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 2d55c6c05abdb5..3122f4e296cb6c 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2243,6 +2243,8 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } #endif // DEBUG + Metrics.LoopsInverted++; + return true; } From e0074ffb6099bb49fe563cd48e9c91c9feab7657 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 6 Jan 2025 14:56:23 +0100 Subject: [PATCH 08/19] Reuse preheader for zero-trip test --- src/coreclr/jit/optimizer.cpp | 36 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index ab6c77aaa3d9e3..420c09994e7522 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2129,8 +2129,8 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } // Split the preheader so we can duplicate the statements into it. The new - // block will be a BBJ_COND node. - BasicBlock* newCond = fgSplitBlockAtEnd(preheader); + // block will be the new preheader. + BasicBlock* newPreheader = fgSplitBlockAtEnd(preheader); if (allProfileWeightsAreValid) { @@ -2143,32 +2143,28 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) // if (delta > preheader->bbWeight) { - newCond->setBBProfileWeight(delta); + newPreheader->setBBProfileWeight(delta); } } - // Split the new block once more to create a proper preheader, so we end up - // with preheader (always) -> newCond (cond) -> newPreheader (always) -> header - BasicBlock* newPreheader = fgSplitBlockAtEnd(newCond); - // Make sure exit stays canonical BasicBlock* nonEnterBlock = fgSplitBlockAtBeginning(exit); JITDUMP("New preheader is " FMT_BB "\n", newPreheader->bbNum); - JITDUMP("Duplicated condition block is " FMT_BB "\n", newCond->bbNum); + JITDUMP("Duplicated condition block is " FMT_BB "\n", preheader->bbNum); JITDUMP("Old exit is " FMT_BB ", new non-enter block is " FMT_BB "\n", exit->bbNum, nonEnterBlock->bbNum); // Get the newCond -> newPreheader edge - FlowEdge* newCondToNewPreheader = newCond->GetTargetEdge(); + FlowEdge* newCondToNewPreheader = preheader->GetTargetEdge(); // Add newCond -> nonEnterBlock - FlowEdge* newCondToNewExit = fgAddRefPred(nonEnterBlock, newCond); + FlowEdge* newCondToNewExit = fgAddRefPred(nonEnterBlock, preheader); - newCond->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, + preheader->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, trueExits ? newCondToNewPreheader : newCondToNewExit); - newCond->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); - newCond->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); + preheader->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); + preheader->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); // Add newPreheader -> stayInLoopSucc FlowEdge* newPreheaderToInLoopSucc = fgAddRefPred(stayInLoopSucc, newPreheader, newPreheader->GetTargetEdge()); @@ -2186,20 +2182,20 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) for (Statement* stmt : block->Statements()) { GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); - Statement* clonedStmt = fgNewStmtAtEnd(newCond, clonedTree, stmt->GetDebugInfo()); + Statement* clonedStmt = fgNewStmtAtEnd(preheader, clonedTree, stmt->GetDebugInfo()); if (stmt == condBlock->lastStmt()) { // TODO: This ought not to be necessary, but has large negative diffs if we don't do it assert(clonedStmt->GetRootNode()->OperIs(GT_JTRUE)); clonedStmt->GetRootNode()->AsUnOp()->gtOp1 = gtReverseCond(clonedStmt->GetRootNode()->gtGetOp1()); - newCond->SetCond(newCond->GetFalseEdge(), newCond->GetTrueEdge()); + preheader->SetCond(preheader->GetFalseEdge(), preheader->GetTrueEdge()); } DISPSTMT(clonedStmt); } - newCond->CopyFlags(block, BBF_COPY_PROPAGATE); + preheader->CopyFlags(block, BBF_COPY_PROPAGATE); } // If we have profile data for all blocks and we know that we are cloning the @@ -2229,8 +2225,8 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) if (JitConfig.JitProfileChecks() > 0) { const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(newCond->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(newCond->GetTrueTarget(), checks); + const bool nextProfileOk = fgDebugCheckIncomingProfileData(preheader->GetFalseTarget(), checks); + const bool jumpProfileOk = fgDebugCheckIncomingProfileData(preheader->GetTrueTarget(), checks); if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) { @@ -2245,10 +2241,10 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) #ifdef DEBUG if (verbose) { - printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", newCond->bbNum, loop->GetIndex()); + printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", preheader->bbNum, loop->GetIndex()); printf("Estimated code size expansion is %d\n", estDupCostSz); - fgDumpBlock(newCond); + fgDumpBlock(preheader); fgDumpBlock(condBlock); } #endif // DEBUG From 1931c5a18dcfae57fb18e8a970caa21b4069ea34 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 6 Jan 2025 15:27:37 +0100 Subject: [PATCH 09/19] Compact latch block if possible --- src/coreclr/jit/optimizer.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 420c09994e7522..c97b0c06121a52 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2238,6 +2238,25 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) #endif // DEBUG } + // Finally compact the condition with its pred if that is possible now. + // TODO-Cleanup: This compensates for limitations in analysis of downstream + // phases, particularly the pattern-based IV analysis. + BasicBlock* condPred = condBlock->GetUniquePred(this); + if (condPred != nullptr) + { + JITDUMP("Cond block " FMT_BB " has a unique pred now, seeing if we can compact...\n", condBlock->bbNum); + if (fgCanCompactBlock(condPred)) + { + JITDUMP(" ..we can!\n"); + fgCompactBlock(condPred); + condBlock = condPred; + } + else + { + JITDUMP(" ..we cannot\n"); + } + } + #ifdef DEBUG if (verbose) { From 06854ab78c3ace3afa458ac05a85b064e91a771e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 6 Jan 2025 15:28:00 +0100 Subject: [PATCH 10/19] Run jit-format --- src/coreclr/jit/optimizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index c97b0c06121a52..a4ec42c2c6a706 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2161,7 +2161,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) FlowEdge* newCondToNewExit = fgAddRefPred(nonEnterBlock, preheader); preheader->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, - trueExits ? newCondToNewPreheader : newCondToNewExit); + trueExits ? newCondToNewPreheader : newCondToNewExit); preheader->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); preheader->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); @@ -2224,9 +2224,9 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) { if (JitConfig.JitProfileChecks() > 0) { - const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(preheader->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(preheader->GetTrueTarget(), checks); + const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); + const bool nextProfileOk = fgDebugCheckIncomingProfileData(preheader->GetFalseTarget(), checks); + const bool jumpProfileOk = fgDebugCheckIncomingProfileData(preheader->GetTrueTarget(), checks); if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) { From 6c24c5356cf62800fa99ea1305232d885ab2b83e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 6 Jan 2025 19:38:50 +0100 Subject: [PATCH 11/19] Remove quirks --- src/coreclr/jit/optimizer.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index a4ec42c2c6a706..c53a785fbbcfa4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1936,21 +1936,6 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) return false; } - // TODO-Quirk: Remove this - if (preheader->JumpsToNext()) - { - JITDUMP("No loop inversion for " FMT_LP " since its preheader is already fallthrough\n", loop->GetIndex()); - return false; - } - - // TODO-Quirk: Remove this - if (!preheader->NextIs(stayInLoopSucc)) - { - JITDUMP("No loop inversion for " FMT_LP " since its preheader does not fall through to \"stayInLoopSucc\"\n", - loop->GetIndex()); - return false; - } - // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will // have to split the exit such that old loop edges exit to one half, while the duplicated condition // exits to the other half. This will result in jump into the middle of a try-region, which is illegal. From b79fc7a6fffe0dc00689d1bd44cbd686da8ccd34 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 18 Mar 2025 19:56:25 -0400 Subject: [PATCH 12/19] Leave fgRenumberBlocks in for now --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index d35258113be0b5..16a23180f86780 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2270,7 +2270,7 @@ PhaseStatus Compiler::optInvertLoops() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } - // fgRenumberBlocks(); + fgRenumberBlocks(); } return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; From 8a9a547f07df61335716669516d965d56eb9f26c Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 19 Mar 2025 16:04:46 -0400 Subject: [PATCH 13/19] Simplify profile maintenance --- src/coreclr/jit/optimizer.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 16a23180f86780..48d59638569147 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1975,11 +1975,11 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) } } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - weight_t const weightPreheader = preheader->bbWeight; - weight_t const weightCond = condBlock->bbWeight; - weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; + weight_t const weightPreheader = preheader->bbWeight; + weight_t const weightCond = condBlock->bbWeight; + weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -2174,22 +2174,23 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) if (haveProfileWeights) { - newPreheader->setBBProfileWeight(newPreheader->computeIncomingWeight()); + // Reduce flow into the new loop entry/exit blocks + newPreheader->setBBProfileWeight(newCondToNewPreheader->getLikelyWeight()); + exit->decreaseBBProfileWeight(newCondToNewExit->getLikelyWeight()); // Update the weight for the duplicated blocks. Normally, this reduces // the weight of condBlock, except in odd cases of stress modes with // inconsistent weights. - + for (int i = 0; i < duplicatedBlocks.Height(); i++) { BasicBlock* block = duplicatedBlocks.Bottom(i); JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", block->bbNum, weightCond, weightStayInLoopSucc); - block->inheritWeight(stayInLoopSucc); + block->setBBProfileWeight(weightStayInLoopSucc); } condBlock->setBBProfileWeight(condBlock->computeIncomingWeight()); - exit->setBBProfileWeight(exit->computeIncomingWeight()); } // Finally compact the condition with its pred if that is possible now. From fe411d2f4135d801d18623ab8446168cf63b52e1 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Mar 2025 11:22:18 -0400 Subject: [PATCH 14/19] Remove fgRenumberBlocks call --- src/coreclr/jit/optimizer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 48d59638569147..920bd101426fda 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2182,7 +2182,7 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) // the weight of condBlock, except in odd cases of stress modes with // inconsistent weights. - for (int i = 0; i < duplicatedBlocks.Height(); i++) + for (int i = 0; i < (duplicatedBlocks.Height() - 1); i++) { BasicBlock* block = duplicatedBlocks.Bottom(i); JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", block->bbNum, weightCond, @@ -2270,8 +2270,6 @@ PhaseStatus Compiler::optInvertLoops() m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } - - fgRenumberBlocks(); } return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; From 7d1e7c84f41183ad4a257b1f313db25109c260cb Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Mar 2025 13:09:40 -0400 Subject: [PATCH 15/19] Fix fgOptimizeBranch --- src/coreclr/jit/fgopt.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 35ed6ab2a6b0f0..23ca675e88cac8 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2680,12 +2680,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) { // Clone/substitute the expression. Statement* stmt = gtCloneStmt(curStmt); - - // cloneExpr doesn't handle everything. - if (stmt == nullptr) - { - return false; - } + assert(stmt != nullptr); if (fgNodeThreading == NodeThreading::AllTrees) { @@ -2716,9 +2711,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) condTree = condTree->gtGetOp1(); // This condTree has to be a RelOp comparison. - if (condTree->OperIsCompare() == false) + // If not, return true since we created new nodes. + if (!condTree->OperIsCompare()) { - return false; + return true; } // Join the two linked lists. From 0f12f5d58ea3380c8f64fb9b4b76f5404b839485 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Mar 2025 14:52:51 -0400 Subject: [PATCH 16/19] Fix another spot in fgOptimizeBranch --- src/coreclr/jit/fgopt.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 23ca675e88cac8..1171b0c0e89037 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2664,9 +2664,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } #endif // DEBUG + // Computing the duplication cost may have triggered node reordering, so return true to indicate we modified IR if (costIsTooHigh) { - return false; + return true; } /* Looks good - duplicate the conditional block */ From d7ec36dec6a5fa960587f140da0d670d4fc3c371 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 27 May 2025 11:49:29 -0400 Subject: [PATCH 17/19] Move phase back --- src/coreclr/jit/compiler.cpp | 8 ++++---- src/coreclr/jit/optimizer.cpp | 26 +++++--------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 0beff88b313a63..d085ef712a40fb 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4641,6 +4641,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_EMPTY_TRY_CATCH_FAULT_2, &Compiler::fgRemoveEmptyTryCatchOrTryFault); + // Invert loops + // + DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); + // Run some flow graph optimizations (but don't reorder) // DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); @@ -4663,10 +4667,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_REPAIR_PROFILE_POST_MORPH, &Compiler::fgRepairProfile); - // Invert loops - // - DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); - // Scale block weights and mark run rarely blocks. // DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3b805b76bfc9d7..83dc6767e58398 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2249,36 +2249,20 @@ PhaseStatus Compiler::optInvertLoops() } #endif // OPT_CONFIG - bool madeChanges = false; - if (compCodeOpt() != SMALL_CODE) { + fgDfsBlocksAndRemove(); + optFindLoops(); for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - madeChanges |= optTryInvertWhileLoop(loop); + optTryInvertWhileLoop(loop); } - } - if (madeChanges) - { fgInvalidateDfsTree(); - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - - // In normal cases no further canonicalization will be required as we - // try to ensure loops stay canonicalized during inversion. However, - // duplicating the condition outside an inner loop can introduce new - // exits in the parent loop, so we may rarely need to recanonicalize - // exit blocks. - if (optCanonicalizeLoops()) - { - fgInvalidateDfsTree(); - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - } + return PhaseStatus::MODIFIED_EVERYTHING; } - return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return PhaseStatus::MODIFIED_NOTHING; } //----------------------------------------------------------------------------- From 7becde0e1bb2af06ceab99f2cb9970735a5be7a0 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 29 May 2025 18:44:41 -0400 Subject: [PATCH 18/19] Add size restriction --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/loopcloning.cpp | 12 +++++------- src/coreclr/jit/optimizer.cpp | 6 ++++++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 490c5f3f003591..12efe25dbeefd9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7061,7 +7061,7 @@ class Compiler PhaseStatus optCloneLoops(); PhaseStatus optRangeCheckCloning(); - bool optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + bool optShouldCloneLoop(FlowGraphNaturalLoop* loop); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR); diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index f54f5d0f0d5678..b567f03e8bb50f 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1819,8 +1819,7 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, // optShouldCloneLoop: Decide if a loop that can be cloned should be cloned. // // Arguments: -// loop - the current loop for which the optimizations are performed. -// context - data structure where all loop cloning info is kept. +// loop - the current loop for which the optimizations are performed. // // Returns: // true if expected performance gain from cloning is worth the potential @@ -1836,7 +1835,7 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, // This value is compared to a hard-coded threshold, and if bigger, // then the method returns false. // -bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) +bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop) { // See if loop size exceeds the limit. // @@ -1859,12 +1858,11 @@ bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* if (result == BasicBlockVisit::Abort) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), sizeLimit); + JITDUMP("Rejecting loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), sizeLimit); return false; } - JITDUMP("Loop cloning: loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, - sizeLimit); + JITDUMP("Loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, sizeLimit); return true; } @@ -3149,7 +3147,7 @@ PhaseStatus Compiler::optCloneLoops() // No need to clone. context.CancelLoopOptInfo(loop->GetIndex()); } - else if (!optShouldCloneLoop(loop, &context)) + else if (!optShouldCloneLoop(loop)) { context.CancelLoopOptInfo(loop->GetIndex()); } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 83dc6767e58398..c4000f320cae0a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1968,6 +1968,12 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", condBlock->bbNum, loop->GetIndex()); + // Loops that are too large for cloning probably won't benefit from inversion. + if (!optShouldCloneLoop(loop)) + { + return false; + } + unsigned estDupCostSz = 0; for (int i = 0; i < duplicatedBlocks.Height(); i++) From 9c2b94509c6dfcbcf00a94e73e257fcd09608c8b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 3 Jun 2025 10:31:03 -0400 Subject: [PATCH 19/19] Refactor loop size calculation, and add inversion size limit --- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/jitconfigvalues.h | 4 +- src/coreclr/jit/loopcloning.cpp | 65 +++++-------------------------- src/coreclr/jit/optimizer.cpp | 47 +++++++++++++++++++++- 4 files changed, 60 insertions(+), 59 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 20ecd884bf9669..b8fcd26151c1dd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7063,10 +7063,11 @@ class Compiler bool optCanonicalizeExits(FlowGraphNaturalLoop* loop); bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit); + + bool optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit); PhaseStatus optCloneLoops(); PhaseStatus optRangeCheckCloning(); - bool optShouldCloneLoop(FlowGraphNaturalLoop* loop); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 31d6f0c3013e85..d88d336dcea938 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -559,7 +559,9 @@ OPT_CONFIG_INTEGER(JitDoOptimizeIVs, "JitDoOptimizeIVs", 1) // Perform optim OPT_CONFIG_INTEGER(JitDoEarlyProp, "JitDoEarlyProp", 1) // Perform Early Value Propagation OPT_CONFIG_INTEGER(JitDoLoopHoisting, "JitDoLoopHoisting", 1) // Perform loop hoisting on loop invariant values OPT_CONFIG_INTEGER(JitDoLoopInversion, "JitDoLoopInversion", 1) // Perform loop inversion on "for/while" loops -OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis +RELEASE_CONFIG_INTEGER(JitLoopInversionSizeLimit, "JitLoopInversionSizeLimit", 100) // limit inversion to loops with no + // more than this many tree nodes +OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis OPT_CONFIG_INTEGER(JitDoVNBasedDeadStoreRemoval, "JitDoVNBasedDeadStoreRemoval", 1) // Perform VN-based dead store // removal OPT_CONFIG_INTEGER(JitDoRedundantBranchOpts, "JitDoRedundantBranchOpts", 1) // Perform redundant branch optimizations diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index b567f03e8bb50f..ca660c739d8770 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1815,58 +1815,6 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, } } -//------------------------------------------------------------------------ -// optShouldCloneLoop: Decide if a loop that can be cloned should be cloned. -// -// Arguments: -// loop - the current loop for which the optimizations are performed. -// -// Returns: -// true if expected performance gain from cloning is worth the potential -// size increase. -// -// Remarks: -// This is a simple-minded heuristic meant to avoid "runaway" cloning -// where large loops are cloned. -// -// We estimate the size cost of cloning by summing up the number of -// tree nodes in all statements in all blocks in the loop. -// -// This value is compared to a hard-coded threshold, and if bigger, -// then the method returns false. -// -bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop) -{ - // See if loop size exceeds the limit. - // - const int sizeConfig = JitConfig.JitCloneLoopsSizeLimit(); - unsigned const sizeLimit = (sizeConfig >= 0) ? (unsigned)sizeConfig : UINT_MAX; - unsigned size = 0; - - BasicBlockVisit result = loop->VisitLoopBlocks([&](BasicBlock* block) { - assert(sizeLimit >= size); - unsigned const slack = sizeLimit - size; - unsigned blockSize = 0; - if (block->ComplexityExceeds(this, slack, &blockSize)) - { - return BasicBlockVisit::Abort; - } - - size += blockSize; - return BasicBlockVisit::Continue; - }); - - if (result == BasicBlockVisit::Abort) - { - JITDUMP("Rejecting loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), sizeLimit); - return false; - } - - JITDUMP("Loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, sizeLimit); - - return true; -} - //---------------------------------------------------------------------------- // optIsLoopClonable: Determine whether this loop can be cloned. // @@ -3129,8 +3077,9 @@ PhaseStatus Compiler::optCloneLoops() } else { - bool allTrue = false; - bool anyFalse = false; + bool allTrue = false; + bool anyFalse = false; + const int sizeLimit = JitConfig.JitCloneLoopsSizeLimit(); context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { @@ -3147,7 +3096,13 @@ PhaseStatus Compiler::optCloneLoops() // No need to clone. context.CancelLoopOptInfo(loop->GetIndex()); } - else if (!optShouldCloneLoop(loop)) + // This is a simple-minded heuristic meant to avoid "runaway" cloning + // where large loops are cloned. + // We estimate the size cost of cloning by summing up the number of + // tree nodes in all statements in all blocks in the loop. + // This value is compared to a hard-coded threshold, and if bigger, + // then the method returns false. + else if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { context.CancelLoopOptInfo(loop->GetIndex()); } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index ac61e2a283f68a..ed24250161ea0d 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1900,8 +1900,10 @@ bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", condBlock->bbNum, loop->GetIndex()); - // Loops that are too large for cloning probably won't benefit from inversion. - if (!optShouldCloneLoop(loop)) + // Check if loop is small enough to consider for inversion. + // Large loops are less likely to benefit from inversion. + const int sizeLimit = JitConfig.JitLoopInversionSizeLimit(); + if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { return false; } @@ -2838,6 +2840,47 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) return true; } +//------------------------------------------------------------------------ +// optLoopComplexityExceeds: Check if the number of nodes in the loop exceeds some limit +// +// Arguments: +// loop - the loop to compute the number of nodes in +// limit - limit on the number of nodes +// +// Returns: +// true if the number of nodes exceeds the limit +// +bool Compiler::optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit) +{ + assert(loop != nullptr); + + // See if loop size exceeds the limit. + // + unsigned size = 0; + + BasicBlockVisit const result = loop->VisitLoopBlocks([this, limit, &size](BasicBlock* block) { + assert(limit >= size); + unsigned const slack = limit - size; + unsigned blockSize = 0; + if (block->ComplexityExceeds(this, slack, &blockSize)) + { + return BasicBlockVisit::Abort; + } + + size += blockSize; + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + JITDUMP("Loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), limit); + return true; + } + + JITDUMP("Loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, limit); + return false; +} + //----------------------------------------------------------------------------- // optSetWeightForPreheaderOrExit: Set the weight of a newly created preheader // or exit, after it has been added to the flowgraph.