diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 9073d3dc50fe6b..2c1ccdfda21e43 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1685,22 +1685,3 @@ BBswtDesc::BBswtDesc(Compiler* comp, const BBswtDesc* other) bbsDstTab[i] = other->bbsDstTab[i]; } } - -//------------------------------------------------------------------------ -// unmarkLoopAlign: Unmarks the LOOP_ALIGN flag from the block and reduce the -// loop alignment count. -// -// Arguments: -// compiler - Compiler instance -// reason - Reason to print in JITDUMP -// -void BasicBlock::unmarkLoopAlign(Compiler* compiler DEBUG_ARG(const char* reason)) -{ - // Make sure we unmark and count just once. - if (isLoopAlign()) - { - compiler->loopAlignCandidates--; - bbFlags &= ~BBF_LOOP_ALIGN; - JITDUMP("Unmarking LOOP_ALIGN from " FMT_BB ". Reason= %s.\n", bbNum, reason); - } -} diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 81a511f4bceed6..f60d304ec38932 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -657,8 +657,6 @@ struct BasicBlock : private LIR::Range return ((bbFlags & BBF_LOOP_ALIGN) != 0); } - void unmarkLoopAlign(Compiler* comp DEBUG_ARG(const char* reason)); - bool hasAlign() const { return ((bbFlags & BBF_HAS_ALIGN) != 0); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 56701ceb08fad3..2a2461e914cd0f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4776,6 +4776,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Unroll loops // DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops); + + // Clear loop table info that is not used after this point, and might become invalid. + // + DoPhase(this, PHASE_CLEAR_LOOP_INFO, &Compiler::optClearLoopIterInfo); } #ifdef DEBUG @@ -5088,6 +5092,77 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #if FEATURE_LOOP_ALIGN +//------------------------------------------------------------------------ +// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment. +// +// All innermost loops whose block weight meets a threshold are candidates for alignment. +// The `top` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this +// (the loop table itself is not changed). +// +// Depends on the loop table, and on block weights being set. +// +// Sets `loopAlignCandidates` to the number of loop candidates for alignment. +// +void Compiler::optIdentifyLoopsForAlignment() +{ + assert(codeGen->ShouldAlignLoops()); + + loopAlignCandidates = 0; + + for (BasicBlock::loopNumber loopInd = 0; loopInd < optLoopCount; loopInd++) + { + const LoopDsc& loop = optLoopTable[loopInd]; + + if (loop.lpFlags & LPFLG_REMOVED) + { + // Don't need JitDump output for this common case. + continue; + } + + if (loop.lpChild != BasicBlock::NOT_IN_LOOP) + { + JITDUMP("Skipping alignment for " FMT_LP "; not an innermost loop\n", loopInd); + continue; + } + + if (loop.lpTop == fgFirstBB) + { + // Adding align instruction in prolog is not supported. (This currently seems unlikely since + // loops normally (always?) have a predecessor `head` block.) + // TODO: Insert an empty block before the loop, if want to align it, so we have a place to put + // the align instruction. + JITDUMP("Skipping alignment for " FMT_LP "; loop starts in first block\n", loopInd); + continue; + } + + if (loop.lpFlags & LPFLG_CONTAINS_CALL) + { + // Heuristic: it is not valuable to align loops with calls. + JITDUMP("Skipping alignment for " FMT_LP "; loop contains call\n", loopInd); + continue; + } + + // Now we have an innerloop candidate that might need alignment + + BasicBlock* top = loop.lpTop; + weight_t topWeight = top->getBBWeight(this); + weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT; + if (topWeight >= compareWeight) + { + loopAlignCandidates++; + assert(!top->isLoopAlign()); + top->bbFlags |= BBF_LOOP_ALIGN; + JITDUMP("Aligning " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " >= " FMT_WT ".\n", loopInd, + top->bbNum, topWeight, compareWeight); + } + else + { + JITDUMP("Skipping alignment for " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " < " FMT_WT ".\n", + loopInd, top->bbNum, topWeight, compareWeight); + } + } +} + //------------------------------------------------------------------------ // placeLoopAlignInstructions: Iterate over all the blocks and determine // the best position to place the 'align' instruction. Inserting 'align' @@ -5100,26 +5175,39 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // void Compiler::placeLoopAlignInstructions() { +#ifdef DEBUG + if (verbose) + { + printf("*************** In placeLoopAlignInstructions()\n"); + } +#endif + + if (!codeGen->ShouldAlignLoops()) + { + JITDUMP("Not aligning loops; ShouldAlignLoops is false\n"); + return; + } + + // Print out the current loop table that we're working on. + DBEXEC(verbose, optPrintLoopTable()); + + assert(loopAlignCandidates == 0); // We currently expect nobody has touched this yet. + optIdentifyLoopsForAlignment(); + if (loopAlignCandidates == 0) { + JITDUMP("No loop candidates\n"); return; } int loopsToProcess = loopAlignCandidates; - JITDUMP("Inside placeLoopAlignInstructions for %d loops.\n", loopAlignCandidates); + JITDUMP("Considering %d loops\n", loopAlignCandidates); // Add align only if there were any loops that needed alignment weight_t minBlockSoFar = BB_MAX_WEIGHT; BasicBlock* bbHavingAlign = nullptr; BasicBlock::loopNumber currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP; - if ((fgFirstBB != nullptr) && fgFirstBB->isLoopAlign()) - { - // Adding align instruction in prolog is not supported - // hence just remove that loop from our list. - loopsToProcess--; - } - for (BasicBlock* const block : Blocks()) { if (currentAlignedLoopNum != BasicBlock::NOT_IN_LOOP) @@ -5178,7 +5266,8 @@ void Compiler::placeLoopAlignInstructions() assert(loopsToProcess == 0); } -#endif + +#endif // FEATURE_LOOP_ALIGN //------------------------------------------------------------------------ // generatePatchpointInfo: allocate and fill in patchpoint info data, diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ea29174e057691..0b010b5501024f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3670,6 +3670,8 @@ class Compiler #endif BasicBlock* bbNewBasicBlock(BBjumpKinds jumpKind); + + void optIdentifyLoopsForAlignment(); void placeLoopAlignInstructions(); /* @@ -6987,6 +6989,8 @@ class Compiler BasicBlock* exit, unsigned char exitCnt); + void optClearLoopIterInfo(); + #ifdef DEBUG void optPrintLoopInfo(unsigned lnum, bool printVerbose = false); void optPrintLoopInfo(const LoopDsc* loop, bool printVerbose = false); @@ -7025,8 +7029,6 @@ class Compiler void optFindNaturalLoops(); - void optIdentifyLoopsForAlignment(); - // Ensures that all the loops in the loop nest rooted at "loopInd" (an index into the loop table) are 'canonical' -- // each loop has a unique "top." Returns "true" iff the flowgraph has been modified. bool optCanonicalizeLoopNest(unsigned char loopInd); @@ -9754,7 +9756,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // based on experimenting with various benchmarks. // Default minimum loop block weight required to enable loop alignment. -#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 4 +#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 3 // By default a loop will be aligned at 32B address boundary to get better // performance as per architecture manuals. diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index e5c15c1d438a6c..f0fd12a2036838 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -61,6 +61,7 @@ CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits", CompPhaseNameMacro(PHASE_FIND_LOOPS, "Find loops", "LOOP-FND", false, -1, false) CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false) CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false) +CompPhaseNameMacro(PHASE_CLEAR_LOOP_INFO, "Clear loop info", "LP-CLEAR", false, -1, false) CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1, false) CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1, false) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 6b41c055422b0b..c8ed81af7b986b 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4712,14 +4712,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) { succBlock->bbFlags |= BBF_LOOP_HEAD; - if (block->isLoopAlign()) - { - loopAlignCandidates++; - succBlock->bbFlags |= BBF_LOOP_ALIGN; - JITDUMP("Propagating LOOP_ALIGN flag from " FMT_BB " to " FMT_BB " for " FMT_LP "\n ", block->bbNum, - succBlock->bbNum, block->bbNatLoopNum); - } - if (fgDomsComputed && fgReachable(succBlock, block)) { // Mark all the reachable blocks between 'succBlock' and 'bPrev' @@ -4867,9 +4859,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) block->bbFlags |= BBF_REMOVED; } - // If this was marked for alignment, remove it - block->unmarkLoopAlign(this DEBUG_ARG("Removed block")); - if (bPrev != nullptr) { switch (bPrev->bbJumpKind) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 4e446837bcb189..e05ebbcb44ef26 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2147,13 +2147,6 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) break; } - if (bNext->isLoopAlign()) - { - block->bbFlags |= BBF_LOOP_ALIGN; - JITDUMP("Propagating LOOP_ALIGN flag from " FMT_BB " to " FMT_BB " during compacting.\n", bNext->bbNum, - block->bbNum); - } - // If we're collapsing a block created after the dominators are // computed, copy block number the block and reuse dominator // information from bNext to block. @@ -5993,9 +5986,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication) // Update the loop table if we removed the bottom of a loop, for example. fgUpdateLoopsAfterCompacting(block, bNext); - // If this block was aligned, unmark it - bNext->unmarkLoopAlign(this DEBUG_ARG("Optimized jump")); - // If this is the first Cold basic block update fgFirstColdBlock if (bNext == fgFirstColdBlock) { diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index bdd8d763aa4c7e..24243451c02162 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1956,18 +1956,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) newBlk->scaleBBWeight(slowPathWeightScaleFactor); blk->scaleBBWeight(fastPathWeightScaleFactor); -// TODO: scale the pred edges of `blk`? - -#if FEATURE_LOOP_ALIGN - // If the original loop is aligned, do not align the cloned loop because cloned loop will be executed in - // rare scenario. Additionally, having to align cloned loop will force us to disable some VEX prefix encoding - // and adding compensation for over-estimated instructions. - if (blk->isLoopAlign()) - { - newBlk->bbFlags &= ~BBF_LOOP_ALIGN; - JITDUMP("Removing LOOP_ALIGN flag from cloned loop in " FMT_BB "\n", newBlk->bbNum); - } -#endif + // TODO: scale the pred edges of `blk`? // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 138e46358065e2..a11f568f431b28 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15366,8 +15366,6 @@ bool Compiler::fgFoldConditional(BasicBlock* block) optMarkLoopRemoved(loopNum); - optLoopTable[loopNum].lpTop->unmarkLoopAlign(this DEBUG_ARG("Bogus loop")); - #ifdef DEBUG if (verbose) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index a6c9ea4d85c689..dde519f18e8395 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -382,8 +382,6 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) } JITDUMP("\n"); - - begBlk->unmarkLoopAlign(this DEBUG_ARG("Removed loop")); } /***************************************************************************************************** @@ -592,6 +590,27 @@ void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmar } } +//------------------------------------------------------------------------ +// optClearLoopIterInfo: Clear the info related to LPFLG_ITER loops in the loop table. +// The various fields related to iterators is known to be valid for loop cloning and unrolling, +// but becomes invalid afterwards. Clear the info that might be used incorrectly afterwards +// in JitDump or by subsequent phases. +// +void Compiler::optClearLoopIterInfo() +{ + for (unsigned lnum = 0; lnum < optLoopCount; lnum++) + { + LoopDsc& loop = optLoopTable[lnum]; + loop.lpFlags &= ~(LPFLG_ITER | LPFLG_VAR_INIT | LPFLG_CONST_INIT | LPFLG_SIMD_LIMIT | LPFLG_VAR_LIMIT | + LPFLG_CONST_LIMIT | LPFLG_ARRLEN_LIMIT); + + loop.lpIterTree = nullptr; + loop.lpInitBlock = nullptr; + loop.lpConstInit = -1; // union with loop.lpVarInit + loop.lpTestTree = nullptr; + } +} + #ifdef DEBUG /***************************************************************************** @@ -2598,50 +2617,6 @@ void Compiler::optFindNaturalLoops() #endif // DEBUG } -//------------------------------------------------------------------------ -// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment. -// -// All innermost loops whose block weight meets a threshold are candidates for alignment. -// The `first` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this -// (the loop table itself is not changed). -// -// Depends on the loop table, and on block weights being set. -// -void Compiler::optIdentifyLoopsForAlignment() -{ -#if FEATURE_LOOP_ALIGN - if (codeGen->ShouldAlignLoops()) - { - for (BasicBlock::loopNumber loopInd = 0; loopInd < optLoopCount; loopInd++) - { - // An innerloop candidate that might need alignment - if (optLoopTable[loopInd].lpChild == BasicBlock::NOT_IN_LOOP) - { - BasicBlock* top = optLoopTable[loopInd].lpTop; - weight_t topWeight = top->getBBWeight(this); - if (topWeight >= (opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT)) - { - // Sometimes with JitOptRepeat > 1, we might end up finding the loops twice. In such - // cases, make sure to count them just once. - if (!top->isLoopAlign()) - { - loopAlignCandidates++; - top->bbFlags |= BBF_LOOP_ALIGN; - JITDUMP(FMT_LP " that starts at " FMT_BB " needs alignment, weight=" FMT_WT ".\n", loopInd, - top->bbNum, top->getBBWeight(this)); - } - } - else - { - JITDUMP("Skip alignment for " FMT_LP " that starts at " FMT_BB " weight=" FMT_WT ".\n", loopInd, - top->bbNum, topWeight); - } - } - } - } -#endif -} - //------------------------------------------------------------------------ // optRedirectBlock: Replace the branch successors of a block based on a block map. // @@ -3912,13 +3887,6 @@ PhaseStatus Compiler::optUnrollLoops() #endif } -#if FEATURE_LOOP_ALIGN - for (BasicBlock* const block : loop.LoopBlocks()) - { - block->unmarkLoopAlign(this DEBUG_ARG("Unrolled loop")); - } -#endif - // Create the unrolled loop statement list. { // When unrolling a loop, that loop disappears (and will be removed from the loop table). Each unrolled @@ -5031,7 +4999,6 @@ void Compiler::optFindLoops() { optFindNaturalLoops(); optFindAndScaleGeneralLoopBlocks(); - optIdentifyLoopsForAlignment(); // Check if any of the loops need alignment } #ifdef DEBUG @@ -8023,19 +7990,6 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) // Marks the containsCall information to "lnum" and any parent loops. void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) { - -#if FEATURE_LOOP_ALIGN - // If this is the inner most loop, reset the LOOP_ALIGN flag - // because a loop having call will not likely to benefit from - // alignment - if (optLoopTable[lnum].lpChild == BasicBlock::NOT_IN_LOOP) - { - BasicBlock* top = optLoopTable[lnum].lpTop; - - top->unmarkLoopAlign(this DEBUG_ARG("Loop with call")); - } -#endif - assert(0 <= lnum && lnum < optLoopCount); while (lnum != BasicBlock::NOT_IN_LOOP) {