From 781c25b72579cac7b22dbd85269b35eb9fe51090 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 3 Mar 2021 19:06:07 -0800 Subject: [PATCH 1/2] JIT: profile updates for finally optimizations Update `impImportLeave` to propagate IBC counts to new blocks. Fix profile weights during callfinally chain merging. Scale profile weights for cloned finallys. Choose the continuation path based on profile weight. Make sure to keep handler entry hot. Partial fix for #48925. --- src/coreclr/jit/fgehopt.cpp | 206 +++++++++++++++++++++++++++++++---- src/coreclr/jit/importer.cpp | 30 +++-- 2 files changed, 197 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index ba817b5b49091d..efa7342e73fce3 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -779,18 +779,24 @@ PhaseStatus Compiler::fgCloneFinally() " %u blocks, %u statements\n", XTnum, regionBBCount, regionStmtCount); - // Walk the try region backwards looking for the last block - // that transfers control to a callfinally. + // Walk the try region backwards looking for blocks + // that transfer control to a callfinally. + // + // For non-pgo we find the lexically last block; for + // pgo we find the highest-weight block. + // BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; BasicBlock* const lastTryBlock = HBtab->ebdTryLast; assert(firstTryBlock->getTryIndex() == XTnum); assert(bbInTryRegions(XTnum, lastTryBlock)); BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev; - BasicBlock* normalCallFinallyBlock = nullptr; - BasicBlock* normalCallFinallyReturn = nullptr; - BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; - bool tryToRelocateCallFinally = false; + BasicBlock* normalCallFinallyBlock = nullptr; + BasicBlock* normalCallFinallyReturn = nullptr; + BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; + bool tryToRelocateCallFinally = false; + const bool usingProfileWeights = fgIsUsingProfileWeights(); + BasicBlock::weight_t currentWeight = BB_ZERO_WEIGHT; for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev) { @@ -830,12 +836,63 @@ PhaseStatus Compiler::fgCloneFinally() BasicBlock* const jumpDest = block; #endif // FEATURE_EH_CALLFINALLY_THUNKS - // Found our block. + // Found a block that invokes the finally. + // BasicBlock* const finallyReturnBlock = jumpDest->bbNext; BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest; + bool isUpdate = false; - normalCallFinallyBlock = jumpDest; - normalCallFinallyReturn = postTryFinallyBlock; + // See if this is the one we want to use to inspire cloning. + // + if (normalCallFinallyBlock == nullptr) + { + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + + if (usingProfileWeights) + { + if (block->hasProfileWeight()) + { + JITDUMP("Found profiled " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + currentWeight = block->bbWeight; + } + else + { + JITDUMP("Found unprofiled " FMT_BB "\n", block->bbNum); + } + } + } + else + { + assert(usingProfileWeights); + + if (!block->hasProfileWeight()) + { + // An unprofiled block in method with profile data. + // We generally don't expect to see these as the + // blocks in EH regions must have come from the root + // method, which we know has profile data. + // Just skip over them for now. + // + JITDUMP("Skipping past unprofiled " FMT_BB "\n", block->bbNum); + continue; + } + + if (block->bbWeight <= currentWeight) + { + JITDUMP("Skipping past " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + continue; + } + + // Prefer this block. + // + JITDUMP("Preferring " FMT_BB " since " FMT_WT " > " FMT_WT "\n", block->bbNum, block->bbWeight, + currentWeight); + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + currentWeight = block->bbWeight; + isUpdate = true; + } #if FEATURE_EH_CALLFINALLY_THUNKS // When there are callfinally thunks, we don't expect to see the @@ -850,16 +907,24 @@ PhaseStatus Compiler::fgCloneFinally() // through from the try into the clone. tryToRelocateCallFinally = true; - JITDUMP("Chose path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" + JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #else - JITDUMP("Chose path to clone: try block " FMT_BB " is a callfinally;" + JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #endif // FEATURE_EH_CALLFINALLY_THUNKS - break; + // For non-pgo just take the first one we find. + // For pgo, keep searching in case we find one we like better. + // + if (!usingProfileWeights) + { + break; + } } // If there is no call to the finally, don't clone. @@ -956,11 +1021,14 @@ PhaseStatus Compiler::fgCloneFinally() // // Clone the finally body, and splice it into the flow graph // within in the parent region of the try. - const unsigned finallyTryIndex = firstBlock->bbTryIndex; - BasicBlock* insertAfter = nullptr; - BlockToBlockMap blockMap(getAllocator()); - bool clonedOk = true; - unsigned cloneBBCount = 0; + // + const unsigned finallyTryIndex = firstBlock->bbTryIndex; + BasicBlock* insertAfter = nullptr; + BlockToBlockMap blockMap(getAllocator()); + bool clonedOk = true; + unsigned cloneBBCount = 0; + BasicBlock::weight_t const originalWeight = + firstBlock->hasProfileWeight() ? firstBlock->bbWeight : BB_ZERO_WEIGHT; for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) { @@ -1067,9 +1135,10 @@ PhaseStatus Compiler::fgCloneFinally() // Modify the targeting call finallys to branch to the cloned // finally. Make a note if we see some calls that can't be // retargeted (since they want to return to other places). - BasicBlock* const firstCloneBlock = blockMap[firstBlock]; - bool retargetedAllCalls = true; - BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock* const firstCloneBlock = blockMap[firstBlock]; + bool retargetedAllCalls = true; + BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock::weight_t retargetedWeight = 0; while (currentBlock != endCallFinallyRangeBlock) { @@ -1088,6 +1157,9 @@ PhaseStatus Compiler::fgCloneFinally() // by the cloned finally and by the called finally. if (postTryFinallyBlock == normalCallFinallyReturn) { + JITDUMP("Retargeting callfinally " FMT_BB " to clone entry " FMT_BB "\n", currentBlock->bbNum, + firstCloneBlock->bbNum); + // This call returns to the expected spot, so // retarget it to branch to the clone. currentBlock->bbJumpDest = firstCloneBlock; @@ -1107,6 +1179,11 @@ PhaseStatus Compiler::fgCloneFinally() // Make sure iteration isn't going off the deep end. assert(leaveBlock != endCallFinallyRangeBlock); + + if (currentBlock->hasProfileWeight()) + { + retargetedWeight += currentBlock->bbWeight; + } } else { @@ -1144,7 +1221,53 @@ PhaseStatus Compiler::fgCloneFinally() // Cleanup the continuation fgCleanupContinuation(normalCallFinallyReturn); - // Todo -- mark cloned blocks as a cloned finally.... + // If we have profile data, compute how the weights split, + // and update the weights in both the clone and the original. + // + // TODO: if original weight is zero, we probably should forgo cloning...? + // + // TODO: it will frequently be the case that the original scale is 0.0 as + // all the profiled flow will go to the clone. + // + // Decide if we really want to set all those counts to zero, and if so + // whether we should mark the original as rarely run. + // + if (usingProfileWeights && (originalWeight > BB_ZERO_WEIGHT)) + { + // We can't leave the finally more often than we enter. + // So cap cloned scale at 1.0f + // + BasicBlock::weight_t const clonedScale = + retargetedWeight < originalWeight ? (retargetedWeight / originalWeight) : 1.0f; + BasicBlock::weight_t const originalScale = 1.0f - clonedScale; + + JITDUMP("Profile scale factor (" FMT_WT "/" FMT_WT ") => clone " FMT_WT " / original " FMT_WT "\n", + retargetedWeight, originalWeight, clonedScale, originalScale); + + for (BasicBlock* block = firstBlock; block != lastBlock->bbNext; block = block->bbNext) + { + if (block->hasProfileWeight()) + { + BasicBlock::weight_t const blockWeight = block->bbWeight; + block->setBBProfileWeight(blockWeight * originalScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight); + +#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION + // Handle a special case -- some handler entries can't have zero profile count. + // + if (bbIsHandlerBeg(block) && block->isRunRarely()) + { + JITDUMP("Suppressing zero count for " FMT_BB " as it is a handler entry\n", block->bbNum); + block->makeBlockHot(); + } +#endif + + BasicBlock* const clonedBlock = blockMap[block]; + clonedBlock->setBBProfileWeight(blockWeight * clonedScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight); + } + } + } // Done! JITDUMP("\nDone with EH#%u\n\n", XTnum); @@ -1767,6 +1890,43 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, assert(callFinally->bbRefs > 0); fgRemoveRefPred(callFinally, block); + // Update profile counts + // + if (block->hasProfileWeight()) + { + // Add weight to the canonical call finally pair. + // + BasicBlock::weight_t const canonicalWeight = + canonicalCallFinally->hasProfileWeight() ? canonicalCallFinally->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newCanonicalWeight = block->bbWeight + canonicalWeight; + + canonicalCallFinally->setBBProfileWeight(newCanonicalWeight); + + BasicBlock* const canonicalLeaveBlock = canonicalCallFinally->bbNext; + + BasicBlock::weight_t const canonicalLeaveWeight = + canonicalLeaveBlock->hasProfileWeight() ? canonicalLeaveBlock->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newLeaveWeight = block->bbWeight + canonicalLeaveWeight; + + canonicalLeaveBlock->setBBProfileWeight(newLeaveWeight); + + // Remove weight from the old call finally pair. + // + if (callFinally->hasProfileWeight()) + { + BasicBlock::weight_t const newCallFinallyWeight = + callFinally->bbWeight > block->bbWeight ? callFinally->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + callFinally->setBBProfileWeight(newCallFinallyWeight); + } + + if (leaveBlock->hasProfileWeight()) + { + BasicBlock::weight_t const newLeaveWeight = + leaveBlock->bbWeight > block->bbWeight ? leaveBlock->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + leaveBlock->setBBProfileWeight(newLeaveWeight); + } + } + return true; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a6aa61578f9995..700d920d3fb99e 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9943,8 +9943,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + callBlock->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -9973,8 +9972,8 @@ void Compiler::impImportLeave(BasicBlock* block) step = fgNewBBafter(BBJ_ALWAYS, callBlock, true); /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10034,8 +10033,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest = finalStep; /* The new block will inherit this block's weight */ - finalStep->setBBWeight(block->bbWeight); - finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + finalStep->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -10197,8 +10195,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - exitBlock->setBBWeight(block->bbWeight); - exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + exitBlock->inheritWeight(block); + exitBlock->bbFlags |= BBF_IMPORTED; /* This exit block is the new step */ step = exitBlock; @@ -10242,8 +10240,8 @@ void Compiler::impImportLeave(BasicBlock* block) block->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10342,8 +10340,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10359,8 +10357,8 @@ void Compiler::impImportLeave(BasicBlock* block) stepType = ST_FinallyReturn; /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10446,8 +10444,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - catchStep->setBBWeight(block->bbWeight); - catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + catchStep->inheritWeight(block); + catchStep->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) From 5b582f26002dc63a5d16eb00b3fe23aa09615ff4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 4 Mar 2021 13:24:06 -0800 Subject: [PATCH 2/2] review feedback --- src/coreclr/jit/fgehopt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index efa7342e73fce3..7de6120fd9b4a4 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -1138,7 +1138,7 @@ PhaseStatus Compiler::fgCloneFinally() BasicBlock* const firstCloneBlock = blockMap[firstBlock]; bool retargetedAllCalls = true; BasicBlock* currentBlock = firstCallFinallyRangeBlock; - BasicBlock::weight_t retargetedWeight = 0; + BasicBlock::weight_t retargetedWeight = BB_ZERO_WEIGHT; while (currentBlock != endCallFinallyRangeBlock) {