diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6dafe12eda3e3f..bdcd6ba8dee36e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7074,10 +7074,11 @@ class Compiler bool optCanonicalizeExits(FlowGraphNaturalLoop* loop); bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit); + + bool optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit); PhaseStatus optCloneLoops(); PhaseStatus optRangeCheckCloning(); - bool optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR); @@ -7132,7 +7133,7 @@ class Compiler OptInvertCountTreeInfoType optInvertCountTreeInfo(GenTree* tree); - bool optInvertWhileLoop(BasicBlock* block); + bool optTryInvertWhileLoop(FlowGraphNaturalLoop* loop); bool optIfConvert(BasicBlock* block); private: diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 494b777b93a476..92e6a524b8fef3 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4733,7 +4733,12 @@ void Compiler::fgDebugCheckLoops() loop->VisitRegularExitBlocks([=](BasicBlock* exit) { for (BasicBlock* pred : exit->PredBlocks()) { - assert(loop->ContainsBlock(pred)); + if (!loop->ContainsBlock(pred)) + { + JITDUMP("Loop " FMT_LP " exit " FMT_BB " has non-loop predecessor " FMT_BB "\n", + loop->GetIndex(), exit->bbNum, pred->bbNum); + assert(!"Loop exit has non-loop predecessor"); + } } return BasicBlockVisit::Continue; }); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 47a9ae05e4ca2a..ab2b2d22859dff 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2661,9 +2661,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } #endif // DEBUG + // Computing the duplication cost may have triggered node reordering, so return true to indicate we modified IR if (costIsTooHigh) { - return false; + return true; } /* Looks good - duplicate the conditional block */ @@ -2677,12 +2678,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) { // Clone/substitute the expression. Statement* stmt = gtCloneStmt(curStmt); - - // cloneExpr doesn't handle everything. - if (stmt == nullptr) - { - return false; - } + assert(stmt != nullptr); if (fgNodeThreading == NodeThreading::AllTrees) { @@ -2713,9 +2709,10 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) condTree = condTree->gtGetOp1(); // This condTree has to be a RelOp comparison. - if (condTree->OperIsCompare() == false) + // If not, return true since we created new nodes. + if (!condTree->OperIsCompare()) { - return false; + return true; } // Join the two linked lists. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 31d6f0c3013e85..d88d336dcea938 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -559,7 +559,9 @@ OPT_CONFIG_INTEGER(JitDoOptimizeIVs, "JitDoOptimizeIVs", 1) // Perform optim OPT_CONFIG_INTEGER(JitDoEarlyProp, "JitDoEarlyProp", 1) // Perform Early Value Propagation OPT_CONFIG_INTEGER(JitDoLoopHoisting, "JitDoLoopHoisting", 1) // Perform loop hoisting on loop invariant values OPT_CONFIG_INTEGER(JitDoLoopInversion, "JitDoLoopInversion", 1) // Perform loop inversion on "for/while" loops -OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis +RELEASE_CONFIG_INTEGER(JitLoopInversionSizeLimit, "JitLoopInversionSizeLimit", 100) // limit inversion to loops with no + // more than this many tree nodes +OPT_CONFIG_INTEGER(JitDoRangeAnalysis, "JitDoRangeAnalysis", 1) // Perform range check analysis OPT_CONFIG_INTEGER(JitDoVNBasedDeadStoreRemoval, "JitDoVNBasedDeadStoreRemoval", 1) // Perform VN-based dead store // removal OPT_CONFIG_INTEGER(JitDoRedundantBranchOpts, "JitDoRedundantBranchOpts", 1) // Perform redundant branch optimizations diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index f3da64f02a1117..4703704553408a 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1815,60 +1815,6 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, } } -//------------------------------------------------------------------------ -// optShouldCloneLoop: Decide if a loop that can be cloned should be cloned. -// -// Arguments: -// loop - the current loop for which the optimizations are performed. -// context - data structure where all loop cloning info is kept. -// -// Returns: -// true if expected performance gain from cloning is worth the potential -// size increase. -// -// Remarks: -// This is a simple-minded heuristic meant to avoid "runaway" cloning -// where large loops are cloned. -// -// We estimate the size cost of cloning by summing up the number of -// tree nodes in all statements in all blocks in the loop. -// -// This value is compared to a hard-coded threshold, and if bigger, -// then the method returns false. -// -bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) -{ - // See if loop size exceeds the limit. - // - const int sizeConfig = JitConfig.JitCloneLoopsSizeLimit(); - unsigned const sizeLimit = (sizeConfig >= 0) ? (unsigned)sizeConfig : UINT_MAX; - unsigned size = 0; - - BasicBlockVisit result = loop->VisitLoopBlocks([&](BasicBlock* block) { - assert(sizeLimit >= size); - unsigned const slack = sizeLimit - size; - unsigned blockSize = 0; - if (block->ComplexityExceeds(this, slack, &blockSize)) - { - return BasicBlockVisit::Abort; - } - - size += blockSize; - return BasicBlockVisit::Continue; - }); - - if (result == BasicBlockVisit::Abort) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), sizeLimit); - return false; - } - - JITDUMP("Loop cloning: loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, - sizeLimit); - - return true; -} - //---------------------------------------------------------------------------- // optIsLoopClonable: Determine whether this loop can be cloned. // @@ -3131,8 +3077,9 @@ PhaseStatus Compiler::optCloneLoops() } else { - bool allTrue = false; - bool anyFalse = false; + bool allTrue = false; + bool anyFalse = false; + const int sizeLimit = JitConfig.JitCloneLoopsSizeLimit(); context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { @@ -3149,7 +3096,13 @@ PhaseStatus Compiler::optCloneLoops() // No need to clone. context.CancelLoopOptInfo(loop->GetIndex()); } - else if (!optShouldCloneLoop(loop, &context)) + // This is a simple-minded heuristic meant to avoid "runaway" cloning + // where large loops are cloned. + // We estimate the size cost of cloning by summing up the number of + // tree nodes in all statements in all blocks in the loop. + // This value is compared to a hard-coded threshold, and if bigger, + // then the method returns false. + else if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { context.CancelLoopOptInfo(loop->GetIndex()); } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 258f371b5fb320..ac0bb9fea4bba0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -1807,13 +1807,11 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t } //----------------------------------------------------------------------------- -// optInvertWhileLoop: modify flow and duplicate code so that for/while loops are +// optTryInvertWhileLoop: modify flow and duplicate code so that for/while loops are // entered at top and tested at bottom (aka loop rotation or bottom testing). // Creates a "zero trip test" condition which guards entry to the loop. // Enables loop invariant hoisting and loop cloning, which depend on -// `do {} while` format loops. Enables creation of a pre-header block after the -// zero trip test to place code that only runs if the loop is guaranteed to -// run at least once. +// `do {} while` format loops. // // Arguments: // block -- block that may be the predecessor of the un-rotated loop's test block. @@ -1821,156 +1819,112 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t // Returns: // true if any IR changes possibly made (used to determine phase return status) // -// Notes: -// Uses a simple lexical screen to detect likely loops. -// -// Specifically, we're looking for the following case: -// -// block: -// ... -// jmp test // `block` argument -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// -// If we find this, and the condition is simple enough, we change -// the loop to the following: -// -// block: -// ... -// jmp bNewCond -// bNewCond: -// ..stmts.. // duplicated cond block statements -// cond // duplicated cond -// jfalse join -// // else fall-through -// top: -// ... -// ... -// test: -// ..stmts.. -// cond -// jtrue top -// join: -// -// Makes no changes if the flow pattern match fails. -// -// May not modify a loop if profile is unfavorable, if the cost of duplicating -// code is large (factoring in potential CSEs). -// -bool Compiler::optInvertWhileLoop(BasicBlock* block) +bool Compiler::optTryInvertWhileLoop(FlowGraphNaturalLoop* loop) { - assert(opts.OptimizationEnabled()); - assert(compCodeOpt() != SMALL_CODE); + // Should have preheaders at this point + assert(loop->EntryEdges().size() == 1); + BasicBlock* const preheader = loop->EntryEdge(0)->getSourceBlock(); - // Does the BB end with an unconditional jump? + ArrayStack duplicatedBlocks(getAllocator(CMK_LoopOpt)); - if (!block->KindIs(BBJ_ALWAYS) || block->JumpsToNext()) + BasicBlock* condBlock = loop->GetHeader(); + while (true) { - return false; - } + if (!BasicBlock::sameEHRegion(preheader, condBlock)) + { + JITDUMP("No loop-inversion for " FMT_LP + " since we could not find a condition block in the same EH region as the preheader\n", + loop->GetIndex()); + return false; + } - if (block->HasFlag(BBF_KEEP_BBJ_ALWAYS)) - { - // It can't be one of the ones we use for our exception magic - return false; - } + duplicatedBlocks.Push(condBlock); - // Get hold of the jump target - BasicBlock* const bTest = block->GetTarget(); + if (condBlock->KindIs(BBJ_ALWAYS)) + { + condBlock = condBlock->GetTarget(); - // Does the bTest consist of 'jtrue(cond) block' ? - if (!bTest->KindIs(BBJ_COND)) - { - return false; - } + if (!loop->ContainsBlock(condBlock) || (condBlock == loop->GetHeader())) + { + JITDUMP("No loop-inversion for " FMT_LP "; ran out of blocks following BBJ_ALWAYS blocks\n", + loop->GetIndex()); + return false; + } - // bTest must be a backwards jump to block->bbNext - // This will be the top of the loop. - // - BasicBlock* const bTop = bTest->GetTrueTarget(); + continue; + } - if (!block->NextIs(bTop)) - { - return false; + if (!condBlock->KindIs(BBJ_COND)) + { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any BBJ_COND block\n", loop->GetIndex()); + return false; + } + + break; } - // Since bTest is a BBJ_COND it will have a false target - // - BasicBlock* const bJoin = bTest->GetFalseTarget(); - noway_assert(bJoin != nullptr); + const bool trueExits = !loop->ContainsBlock(condBlock->GetTrueTarget()); + const bool falseExits = !loop->ContainsBlock(condBlock->GetFalseTarget()); - // 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition - // in a new block after 'block', and the condition might include exception throwing code. - // On non-funclet platforms (x86), the catch exit is a BBJ_ALWAYS, but we don't want that to - // be considered as the head of a loop, so also disallow different handler regions. - if (!BasicBlock::sameEHRegion(block, bTest)) + if (trueExits == falseExits) { + JITDUMP("No loop-inversion for " FMT_LP " since we could not find any exiting BBJ_COND block\n", + loop->GetIndex()); return false; } - // The duplicated condition block will branch to bTest->GetFalseTarget(), so that also better be in the - // same try region (or no try region) to avoid generating illegal flow. - if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin)) + BasicBlock* const exit = trueExits ? condBlock->GetTrueTarget() : condBlock->GetFalseTarget(); + BasicBlock* const stayInLoopSucc = trueExits ? condBlock->GetFalseTarget() : condBlock->GetTrueTarget(); + + // If the condition is already a latch, then the loop is already inverted + if (stayInLoopSucc == loop->GetHeader()) { + JITDUMP("No loop-inversion for " FMT_LP " since it is already inverted\n", loop->GetIndex()); return false; } - // It has to be a forward jump. Defer this check until after all the cheap checks - // are done, since it iterates forward in the block list looking for block's target. - // TODO-CQ: Check if we can also optimize the backwards jump as well. - // - if (!fgIsForwardBranch(block, block->GetTarget())) + // Exiting the loop may enter a new try-region. However, to keep exits canonical, we will + // have to split the exit such that old loop edges exit to one half, while the duplicated condition + // exits to the other half. This will result in jump into the middle of a try-region, which is illegal. + // TODO: We can fix this by placing the first half of the split (which will be an empty block) outside + // the try region. + if (!BasicBlock::sameEHRegion(preheader, exit)) { + JITDUMP("No loop-inversion for " FMT_LP " since the preheader " FMT_BB " and exit " FMT_BB + " are in different EH regions\n", + loop->GetIndex(), preheader->bbNum, exit->bbNum); return false; } - // Find the loop termination test at the bottom of the loop. - Statement* const condStmt = bTest->lastStmt(); + JITDUMP("Condition in block " FMT_BB " of loop " FMT_LP " is a candidate for duplication to invert the loop\n", + condBlock->bbNum, loop->GetIndex()); - // Verify the test block ends with a conditional that we can manipulate. - GenTree* const condTree = condStmt->GetRootNode(); - noway_assert(condTree->OperIs(GT_JTRUE)); - if (!condTree->AsOp()->gtOp1->OperIsCompare()) + // Check if loop is small enough to consider for inversion. + // Large loops are less likely to benefit from inversion. + const int sizeLimit = JitConfig.JitLoopInversionSizeLimit(); + if ((sizeLimit >= 0) && optLoopComplexityExceeds(loop, (unsigned)sizeLimit)) { return false; } - JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", - block->bbNum, bTop->bbNum, bTest->bbNum); - - // Estimate the cost of cloning the entire test block. - // - // Note: it would help throughput to compute the maximum cost - // first and early out for large bTest blocks, as we are doing two - // tree walks per tree. But because of this helper call scan, the - // maximum cost depends on the trees in the block. - // - // We might consider flagging blocks with hoistable helper calls - // during importation, so we can avoid the helper search and - // implement an early bail out for large blocks with no helper calls. - // - // Note that gtPrepareCost can cause operand swapping, so we must - // return `true` (possible IR change) from here on. - unsigned estDupCostSz = 0; - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - GenTree* tree = stmt->GetRootNode(); - gtPrepareCost(tree); - estDupCostSz += tree->GetCostSz(); + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) + { + GenTree* tree = stmt->GetRootNode(); + gtPrepareCost(tree); + estDupCostSz += tree->GetCostSz(); + } } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; + weight_t const weightPreheader = preheader->bbWeight; + weight_t const weightCond = condBlock->bbWeight; + weight_t const weightStayInLoopSucc = stayInLoopSucc->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -1978,25 +1932,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) { // Only rely upon the profile weight when all three of these blocks // have good profile weights - if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight()) + if (preheader->hasProfileWeight() && condBlock->hasProfileWeight() && stayInLoopSucc->hasProfileWeight()) { // If this while loop never iterates then don't bother transforming // - if (weightTop == BB_ZERO_WEIGHT) + if (weightStayInLoopSucc == BB_ZERO_WEIGHT) { - return true; + JITDUMP("No loop-inversion for " FMT_LP " since the in-loop successor " FMT_BB " has 0 weight\n", + loop->GetIndex(), preheader->bbNum); + return false; } haveProfileWeights = true; - // We generally expect weightTest > weightTop + // We generally expect weightCond > weightStayInLoopSucc // // Tolerate small inconsistencies... // - if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest)) + if (!fgProfileWeightsConsistent(weightPreheader + weightStayInLoopSucc, weightCond)) { - JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n", - weightBlock, weightTop, weightTest); + JITDUMP("Profile weights locally inconsistent: preheader " FMT_WT ", stayInLoopSucc " FMT_WT + ", cond " FMT_WT "\n", + weightPreheader, weightStayInLoopSucc, weightCond); } else { @@ -2006,17 +1963,17 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // weightTest is the number of times that we consider entering or remaining in the loop // loopIterations is the average number of times that this loop iterates // - weight_t loopEntries = weightTest - weightTop; + weight_t loopEntries = weightCond - weightStayInLoopSucc; // If profile is inaccurate, try and use other data to provide a credible estimate. // The value should at least be >= weightBlock. // - if (loopEntries < weightBlock) + if (loopEntries < weightPreheader) { - loopEntries = weightBlock; + loopEntries = weightPreheader; } - loopIterations = weightTop / loopEntries; + loopIterations = weightStayInLoopSucc / loopEntries; } } else @@ -2042,8 +1999,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - // If the compare has too high cost then we don't want to dup. - bool costIsTooHigh = (estDupCostSz > maxDupCostSz); OptInvertCountTreeInfoType optInvertTotalInfo = {}; @@ -2058,32 +2013,37 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // // If the condition has array.Length operations, also boost, as they are likely to be CSE'd. - for (Statement* const stmt : bTest->Statements()) + for (int i = 0; i < duplicatedBlocks.Height() && costIsTooHigh; i++) { - GenTree* tree = stmt->GetRootNode(); - - OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); - optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; - optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - - if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* const stmt : block->Statements()) { - // Calculate a new maximum cost. We might be able to early exit. + GenTree* tree = stmt->GetRootNode(); - unsigned newMaxDupCostSz = - maxDupCostSz + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + - 8 * optInvertTotalInfo.arrayLengthCount; + OptInvertCountTreeInfoType optInvertInfo = optInvertCountTreeInfo(tree); + optInvertTotalInfo.sharedStaticHelperCount += optInvertInfo.sharedStaticHelperCount; + optInvertTotalInfo.arrayLengthCount += optInvertInfo.arrayLengthCount; - // Is the cost too high now? - costIsTooHigh = (estDupCostSz > newMaxDupCostSz); - if (!costIsTooHigh) + if ((optInvertInfo.sharedStaticHelperCount > 0) || (optInvertInfo.arrayLengthCount > 0)) { - // No need counting any more trees; we're going to do the transformation. - JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " - "block " FMT_BB, - dspTreeID(tree), bTest->bbNum); - maxDupCostSz = newMaxDupCostSz; // for the JitDump output below - break; + // Calculate a new maximum cost. We might be able to early exit. + + unsigned newMaxDupCostSz = + maxDupCostSz + + 24 * min(optInvertTotalInfo.sharedStaticHelperCount, (int)(loopIterations + 1.5)) + + 8 * optInvertTotalInfo.arrayLengthCount; + + // Is the cost too high now? + costIsTooHigh = (estDupCostSz > newMaxDupCostSz); + if (!costIsTooHigh) + { + // No need counting any more trees; we're going to do the transformation. + JITDUMP("Decided to duplicate loop condition block after counting helpers in tree [%06u] in " + "block " FMT_BB, + dspTreeID(tree), block->bbNum); + maxDupCostSz = newMaxDupCostSz; // for the JitDump output below + break; + } } } } @@ -2097,7 +2057,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) printf( "\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, haveProfileWeights = %s\n", - dspTreeID(condTree), costIsTooHigh ? "not done" : "performed", estDupCostSz, + dspTreeID(condBlock->lastStmt()->GetRootNode()), costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, optInvertTotalInfo.sharedStaticHelperCount, dspBool(haveProfileWeights)); } @@ -2108,148 +2068,103 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return true; } - bool foundCondTree = false; - - // Create a new block after `block` to put the copied condition code. - // - BasicBlock* const bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true); - - // Clone each statement in bTest and append to bNewCond. - for (Statement* const stmt : bTest->Statements()) - { - GenTree* originalTree = stmt->GetRootNode(); - GenTree* clonedTree = gtCloneExpr(originalTree); - - // Special case handling needed for the conditional jump tree - if (originalTree == condTree) - { - foundCondTree = true; - - // Get the compare subtrees - GenTree* originalCompareTree = originalTree->AsOp()->gtOp1; - GenTree* clonedCompareTree = clonedTree->AsOp()->gtOp1; - assert(originalCompareTree->OperIsCompare()); - assert(clonedCompareTree->OperIsCompare()); - - // The original test branches to remain in the loop. The - // new cloned test will branch to avoid the loop. So the - // cloned compare needs to reverse the branch condition. - gtReverseCond(clonedCompareTree); - } + // Split the preheader so we can duplicate the statements into it. The new + // block will be the new preheader. + BasicBlock* const newPreheader = fgSplitBlockAtEnd(preheader); - Statement* clonedStmt = fgNewStmtAtEnd(bNewCond, clonedTree); + // Make sure exit stays canonical + BasicBlock* nonEnterBlock = fgSplitBlockAtBeginning(exit); - if (opts.compDbgInfo) - { - clonedStmt->SetDebugInfo(stmt->GetDebugInfo()); - } - } + JITDUMP("New preheader is " FMT_BB "\n", newPreheader->bbNum); + JITDUMP("Duplicated condition block is " FMT_BB "\n", preheader->bbNum); + JITDUMP("Old exit is " FMT_BB ", new non-enter block is " FMT_BB "\n", exit->bbNum, nonEnterBlock->bbNum); - assert(foundCondTree); + // Get the newCond -> newPreheader edge + FlowEdge* const newCondToNewPreheader = preheader->GetTargetEdge(); - // Flag the block that received the copy as potentially having various constructs. - bNewCond->CopyFlags(bTest, BBF_COPY_PROPAGATE); + // Add newCond -> nonEnterBlock + FlowEdge* const newCondToNewExit = fgAddRefPred(nonEnterBlock, preheader); - // Update pred info - // - // For now we set the likelihood of the newCond branch to match - // the likelihood of the test branch (though swapped, since we're - // currently reversing the condition). This may or may not match - // the block weight adjustments we're making. All this becomes - // easier to reconcile once we rely on edge likelihoods more and - // have synthesis running (so block weights ==> frequencies). - // - // Until then we won't worry that edges and blocks are potentially - // out of sync. - // - FlowEdge* const testTopEdge = bTest->GetTrueEdge(); - FlowEdge* const testJoinEdge = bTest->GetFalseEdge(); - FlowEdge* const newCondJoinEdge = fgAddRefPred(bJoin, bNewCond, testJoinEdge); - FlowEdge* const newCondTopEdge = fgAddRefPred(bTop, bNewCond, testTopEdge); + preheader->SetCond(trueExits ? newCondToNewExit : newCondToNewPreheader, + trueExits ? newCondToNewPreheader : newCondToNewExit); - bNewCond->SetTrueEdge(newCondJoinEdge); - bNewCond->SetFalseEdge(newCondTopEdge); + preheader->GetTrueEdge()->setLikelihood(condBlock->GetTrueEdge()->getLikelihood()); + preheader->GetFalseEdge()->setLikelihood(condBlock->GetFalseEdge()->getLikelihood()); - fgRedirectTargetEdge(block, bNewCond); - assert(block->JumpsToNext()); + // Redirect newPreheader from header to stayInLoopSucc + fgRedirectTargetEdge(newPreheader, stayInLoopSucc); - // Fix flow and profile - // - bNewCond->inheritWeight(block); - - // Move all predecessor edges that look like loop entry edges to point to the new cloned condition - // block, not the existing condition block. The idea is that if we only move `block` to point to - // `bNewCond`, but leave other `bTest` predecessors still pointing to `bTest`, when we eventually - // recognize loops, the loop will appear to have multiple entries, which will prevent optimization. - // We don't have loops yet, but blocks should be in increasing lexical numbered order, so use that - // as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness - // is maintained no matter which condition block we point to, but we'll lose optimization potential - // (and create spaghetti code) if we get it wrong. - // - unsigned const loopFirstNum = bTop->bbNum; - unsigned const loopBottomNum = bTest->bbNum; - for (FlowEdge* const predEdge : bTest->PredEdgesEditing()) + // Duplicate all the code now + for (int i = 0; i < duplicatedBlocks.Height(); i++) { - BasicBlock* const predBlock = predEdge->getSourceBlock(); - unsigned const bNum = predBlock->bbNum; - if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) + BasicBlock* block = duplicatedBlocks.Bottom(i); + for (Statement* stmt : block->Statements()) { - // Looks like the predecessor is from within the potential loop; skip it. - continue; + GenTree* clonedTree = gtCloneExpr(stmt->GetRootNode()); + Statement* clonedStmt = fgNewStmtAtEnd(preheader, clonedTree, stmt->GetDebugInfo()); + + if (stmt == condBlock->lastStmt()) + { + // TODO: This ought not to be necessary, but has large negative diffs if we don't do it + assert(clonedStmt->GetRootNode()->OperIs(GT_JTRUE)); + clonedStmt->GetRootNode()->AsUnOp()->gtOp1 = gtReverseCond(clonedStmt->GetRootNode()->gtGetOp1()); + preheader->SetCond(preheader->GetFalseEdge(), preheader->GetTrueEdge()); + } + + DISPSTMT(clonedStmt); } - // Redirect the predecessor to the new block. - JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, - bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); + preheader->CopyFlags(block, BBF_COPY_PROPAGATE); + } - switch (predBlock->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - case BBJ_COND: - case BBJ_SWITCH: - case BBJ_EHFINALLYRET: - fgReplaceJumpTarget(predBlock, bTest, bNewCond); - break; + if (haveProfileWeights) + { + // Reduce flow into the new loop entry/exit blocks + newPreheader->setBBProfileWeight(newCondToNewPreheader->getLikelyWeight()); + exit->decreaseBBProfileWeight(newCondToNewExit->getLikelyWeight()); - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - // These block types should not need redirecting - break; + // Update the weight for the duplicated blocks. Normally, this reduces + // the weight of condBlock, except in odd cases of stress modes with + // inconsistent weights. - default: - assert(!"Unexpected bbKind for predecessor block"); - break; + for (int i = 0; i < (duplicatedBlocks.Height() - 1); i++) + { + BasicBlock* block = duplicatedBlocks.Bottom(i); + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", block->bbNum, weightCond, + weightStayInLoopSucc); + block->setBBProfileWeight(weightStayInLoopSucc); } + + condBlock->setBBProfileWeight(condBlock->computeIncomingWeight()); } - if (haveProfileWeights) + // Finally compact the condition with its pred if that is possible now. + // TODO-Cleanup: This compensates for limitations in analysis of downstream + // phases, particularly the pattern-based IV analysis. + BasicBlock* const condPred = condBlock->GetUniquePred(this); + if (condPred != nullptr) { - // The above change should have moved some flow out of 'bTest', and into 'bNewCond'. - // Check that no extraneous flow was lost or gained in the process. - // - const weight_t totalWeight = bTest->bbWeight; - bTest->setBBProfileWeight(bTest->computeIncomingWeight()); - bNewCond->setBBProfileWeight(bNewCond->computeIncomingWeight()); - - if (!fgProfileWeightsConsistent(totalWeight, bTest->bbWeight + bNewCond->bbWeight)) + JITDUMP("Cond block " FMT_BB " has a unique pred now, seeing if we can compact...\n", condBlock->bbNum); + if (fgCanCompactBlock(condPred)) + { + JITDUMP(" ..we can!\n"); + fgCompactBlock(condPred); + condBlock = condPred; + } + else { - JITDUMP("Redirecting flow from " FMT_BB " to " FMT_BB " introduced inconsistency. Data %s inconsistent.\n", - bTest->bbNum, bNewCond->bbNum, fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; + JITDUMP(" ..we cannot\n"); } } #ifdef DEBUG if (verbose) { - printf("\nDuplicated loop exit block at " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")\n", bNewCond->bbNum, - bNewCond->GetFalseTarget()->bbNum, bTest->bbNum); + printf("\nDuplicated loop exit block at " FMT_BB " for loop " FMT_LP "\n", preheader->bbNum, loop->GetIndex()); printf("Estimated code size expansion is %d\n", estDupCostSz); - fgDumpBlock(bNewCond); - fgDumpBlock(bTest); + fgDumpBlock(preheader); + fgDumpBlock(condBlock); } #endif // DEBUG @@ -2265,8 +2180,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // PhaseStatus Compiler::optInvertLoops() { - noway_assert(opts.OptimizationEnabled()); - #if defined(OPT_CONFIG) if (!JitConfig.JitDoLoopInversion()) { @@ -2275,31 +2188,20 @@ PhaseStatus Compiler::optInvertLoops() } #endif // OPT_CONFIG - bool madeChanges = fgRenumberBlocks(); - - if (compCodeOpt() == SMALL_CODE) - { - // do not invert any loops - } - else + if (compCodeOpt() != SMALL_CODE) { - for (BasicBlock* const block : Blocks()) + fgDfsBlocksAndRemove(); + optFindLoops(); + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - // Make sure the appropriate fields are initialized - // - if (block->bbWeight == BB_ZERO_WEIGHT) - { - continue; - } - - if (optInvertWhileLoop(block)) - { - madeChanges = true; - } + optTryInvertWhileLoop(loop); } + + fgInvalidateDfsTree(); + return PhaseStatus::MODIFIED_EVERYTHING; } - return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return PhaseStatus::MODIFIED_NOTHING; } //----------------------------------------------------------------------------- @@ -2937,6 +2839,47 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) return true; } +//------------------------------------------------------------------------ +// optLoopComplexityExceeds: Check if the number of nodes in the loop exceeds some limit +// +// Arguments: +// loop - the loop to compute the number of nodes in +// limit - limit on the number of nodes +// +// Returns: +// true if the number of nodes exceeds the limit +// +bool Compiler::optLoopComplexityExceeds(FlowGraphNaturalLoop* loop, unsigned limit) +{ + assert(loop != nullptr); + + // See if loop size exceeds the limit. + // + unsigned size = 0; + + BasicBlockVisit const result = loop->VisitLoopBlocks([this, limit, &size](BasicBlock* block) { + assert(limit >= size); + unsigned const slack = limit - size; + unsigned blockSize = 0; + if (block->ComplexityExceeds(this, slack, &blockSize)) + { + return BasicBlockVisit::Abort; + } + + size += blockSize; + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + JITDUMP("Loop " FMT_LP ": exceeds size limit %u\n", loop->GetIndex(), limit); + return true; + } + + JITDUMP("Loop " FMT_LP ": size %u does not exceed size limit %u\n", loop->GetIndex(), size, limit); + return false; +} + //----------------------------------------------------------------------------- // optSetWeightForPreheaderOrExit: Set the weight of a newly created preheader // or exit, after it has been added to the flowgraph.