From 9a39137fd28af0ffaee5c4fe4cfe2743c31ab81d Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Thu, 21 May 2020 14:47:46 -0700 Subject: [PATCH 1/2] Optimization to remove redundant zero initializations. This change adds a phase that iterates over basic blocks starting with the first basic block until there is no unique basic block successor or until it detects a loop. It keeps track of local nodes it encounters. When it gets to an assignment to a local variable or a local field, it checks whether the assignment is the first reference to the local (or to the parent of the local field), and, if so, it may do one of two optimizations: 1. If the local is untracked, the rhs of the assignment is 0, and the local is guaranteed to be fully initialized in the prolog, the explicit zero initialization is removed. 2. If the assignment is to a local (and not a field) and either the local has no gc pointers or there are no gc-safe points between the prolog and the assignment, it marks the local with lvHasExplicitInit which tells the codegen not to insert zero initialization for this local in the prolog. This addresses one of the examples in #2325 and 5 examples in #1007. --- src/coreclr/src/jit/codegencommon.cpp | 6 ++ src/coreclr/src/jit/compiler.cpp | 2 + src/coreclr/src/jit/compiler.h | 6 ++ src/coreclr/src/jit/compmemkind.h | 1 + src/coreclr/src/jit/compphases.h | 1 + src/coreclr/src/jit/optimizer.cpp | 129 ++++++++++++++++++++++++++ 6 files changed, 145 insertions(+) diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 10129250671e9e..90e5a5bef4017d 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -4655,6 +4655,12 @@ void CodeGen::genCheckUseBlockInit() continue; } + if (varDsc->lvHasExplicitInit) + { + varDsc->lvMustInit = 0; + continue; + } + if (compiler->info.compInitMem || varDsc->HasGCPtr() || varDsc->lvMustInit) { if (varDsc->lvTracked) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index 4779a6fa90c481..0a9003f51268c8 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4690,6 +4690,8 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags DoPhase(this, PHASE_BUILD_SSA, &Compiler::fgSsaBuild); } + DoPhase(this, PHASE_ZERO_INITS, &Compiler::optRemoveRedundantZeroInits); + if (doEarlyProp) { // Propagate array length and rewrite getType() method call diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index e1521d63b2cd22..0efc1d270318f6 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -518,6 +518,10 @@ class LclVarDsc unsigned char lvSuppressedZeroInit : 1; // local needs zero init if we transform tail call to loop + unsigned char lvHasExplicitInit : 1; // The local is explicitly initialized and doesn't need zero initialization in + // the prolog. If the local has gc pointers, there are no gc-safe points + // between the prolog and the explicit initialization. + union { unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct // local. For implicit byref parameters, this gets hijacked between @@ -5906,6 +5910,8 @@ class Compiler void optUnrollLoops(); // Unrolls loops (needs to have cost info) + void optRemoveRedundantZeroInits(); + protected: // This enumeration describes what is killed by a call. diff --git a/src/coreclr/src/jit/compmemkind.h b/src/coreclr/src/jit/compmemkind.h index 1b409544ba23bf..586e5f3ac3ca78 100644 --- a/src/coreclr/src/jit/compmemkind.h +++ b/src/coreclr/src/jit/compmemkind.h @@ -58,6 +58,7 @@ CompMemKindMacro(VariableLiveRanges) CompMemKindMacro(ClassLayout) CompMemKindMacro(TailMergeThrows) CompMemKindMacro(EarlyProp) +CompMemKindMacro(ZeroInit) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/src/jit/compphases.h b/src/coreclr/src/jit/compphases.h index 0cafdc1793e3ca..ccda79c4cfe794 100644 --- a/src/coreclr/src/jit/compphases.h +++ b/src/coreclr/src/jit/compphases.h @@ -55,6 +55,7 @@ CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", "MRGTHROW", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY, "Compute blocks reachability", "BL_REACH", false, -1, false) +CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits", "ZERO-INIT", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1, false) CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false) CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false) diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index 096898ba00997c..daf4a88444b721 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -9189,3 +9189,132 @@ void Compiler::optOptimizeBools() fgDebugCheckBBlist(); #endif } + +typedef JitHashTable, unsigned> LclVarRefCounts; + +//------------------------------------------------------------------------------------------ +// optRemoveRedundantZeroInits: Remove redundant zero intializations. +// +// Notes: +// This phase iterates over basic blocks starting with the first basic block until there is no unique +// basic block successor or until it detects a loop. It keeps track of local nodes it encounters. +// When it gets to an assignment to a local variable or a local field, it checks whether the assignment +// is the first reference to the local (or to the parent of the local field), and, if so, +// it may do one of two optimizations: +// 1. If the local is untracked, the rhs of the assignment is 0, and the local is guaranteed +// to be fully initialized in the prolog, the explicit zero initialization is removed. +// 2. If the assignment is to a local (and not a field) and either the local has no gc pointers or there +// are no gc-safe points between the prolog and the assignment, it marks the local with lvHasExplicitInit +// which tells the codegen not to insert zero initialization for this local in the prolog. + +void Compiler::optRemoveRedundantZeroInits() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optRemoveRedundantZeroInits()\n"); + } +#endif // DEBUG + + CompAllocator allocator(getAllocator(CMK_ZeroInit)); + LclVarRefCounts refCounts(allocator); + bool hasGCSafePoint = false; + + assert(fgStmtListThreaded); + + for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) == 0); + block = block->GetUniqueSucc()) + { + block->bbFlags |= BBF_MARKED; + for (Statement* stmt = block->FirstNonPhiDef(); stmt != nullptr;) + { + Statement* next = stmt->GetNextStmt(); + for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) + { + if ((tree->gtFlags & GTF_CALL) != 0) + { + hasGCSafePoint = true; + } + + switch (tree->gtOper) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: + { + unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); + unsigned* pRefCount = refCounts.LookupPointer(lclNum); + if (pRefCount != nullptr) + { + *pRefCount = (*pRefCount)++; + } + else + { + refCounts.Set(lclNum, 1); + } + + break; + } + case GT_ASG: + { + GenTreeOp* treeOp = tree->AsOp(); + if (treeOp->gtOp1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned lclNum = treeOp->gtOp1->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const lclDsc = lvaGetDesc(lclNum); + unsigned* pRefCount = refCounts.LookupPointer(lclNum); + assert(pRefCount != nullptr); + if (*pRefCount == 1) + { + // The local hasn't been referenced before this assignment. + bool removedExplicitZeroInit = false; + if (!lclDsc->lvTracked && treeOp->gtOp2->IsIntegralConst(0)) + { + bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; + bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; + + if (!fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + { + // We are guaranteed to have a zero initialization in the prolog and + // the local hasn't been redefined between the prolog and this explicit + // zero initialization so the assignment can be safely removed. + if (tree == stmt->GetRootNode()) + { + fgRemoveStmt(block, stmt); + removedExplicitZeroInit = true; + *pRefCount = 0; + lclDsc->lvSuppressedZeroInit = 1; + } + } + } + + if (!removedExplicitZeroInit && treeOp->gtOp1->OperIs(GT_LCL_VAR)) + { + if (!lclDsc->HasGCPtr() || + (!GetInterruptible() && !hasGCSafePoint && !compMethodRequiresPInvokeFrame())) + { + // The local hasn't been used and won't be reported to the gc between + // the prolog and this explicit zero intialization. Therefore, it doesn't + // require zero initialization in the prolog. + lclDsc->lvHasExplicitInit = 1; + } + } + } + } + break; + } + default: + break; + } + } + stmt = next; + } + } + + for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) != 0); + block = block->GetUniqueSucc()) + { + block->bbFlags &= ~BBF_MARKED; + } +} From 7dd58e0e9ea1ed8ab663fc89a8390edf7b0a1484 Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Sun, 24 May 2020 01:45:15 -0700 Subject: [PATCH 2/2] Fix a few issues and address review feedback. We should keep prolog zero initialization if the local is lvLiveInOutOfHndlr and some node between the prolog and the explicit assignment may throw. Some variables are never zero initialized in the prolog. This commit disables the optimization for them. Fixed the statement that incremented the ref count: msvc and clang had different behavior so test on Linux were failing. Code review feedback: relaxed the gc-safe point condition to recognize calls with suppressed gc transitions; fixed comments. --- src/coreclr/src/jit/codegencommon.cpp | 39 +++------------------------ src/coreclr/src/jit/compiler.h | 5 +++- src/coreclr/src/jit/compiler.hpp | 39 +++++++++++++++++++++++++-- src/coreclr/src/jit/flowgraph.cpp | 2 +- src/coreclr/src/jit/importer.cpp | 2 +- src/coreclr/src/jit/objectalloc.cpp | 2 +- src/coreclr/src/jit/optimizer.cpp | 35 +++++++++++++++++------- 7 files changed, 72 insertions(+), 52 deletions(-) diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 90e5a5bef4017d..a89e24e217d51a 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -4596,8 +4596,9 @@ void CodeGen::genCheckUseBlockInit() // double-counting the initialization impact of any locals. bool counted = false; - if (varDsc->lvIsParam) + if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) { + noway_assert(varDsc->lvRefCnt() == 0); continue; } @@ -4608,45 +4609,11 @@ void CodeGen::genCheckUseBlockInit() continue; } - // Likewise, initialization of the GS cookie is handled specially for OSR. - // Could do this for non-OSR too.. (likewise for the dummy) - if (compiler->opts.IsOSR() && varNum == compiler->lvaGSSecurityCookie) - { - continue; - } - - if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) - { - noway_assert(varDsc->lvRefCnt() == 0); - continue; - } - - if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar || - varNum == compiler->lvaRetAddrVar) + if (compiler->fgVarIsNeverZeroInitializedInProlog(varNum)) { continue; } -#if FEATURE_FIXED_OUT_ARGS - if (varNum == compiler->lvaPInvokeFrameRegSaveVar) - { - continue; - } - if (varNum == compiler->lvaOutgoingArgSpaceVar) - { - continue; - } -#endif - -#if defined(FEATURE_EH_FUNCLETS) - // There's no need to force 0-initialization of the PSPSym, it will be - // initialized with a real value in the prolog - if (varNum == compiler->lvaPSPSym) - { - continue; - } -#endif - if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 0efc1d270318f6..c6181d5e194a55 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -4657,8 +4657,11 @@ class Compiler unsigned fgSsaPassesCompleted; // Number of times fgSsaBuild has been run. + // Returns "true" if this is a special variable that is never zero initialized in the prolog. + inline bool fgVarIsNeverZeroInitializedInProlog(unsigned varNum); + // Returns "true" if the variable needs explicit zero initialization. - inline bool fgVarNeedsExplicitZeroInit(LclVarDsc* varDsc, bool bbInALoop, bool bbIsReturn); + inline bool fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool bbIsReturn); // The value numbers for this compilation. ValueNumStore* vnStore; diff --git a/src/coreclr/src/jit/compiler.hpp b/src/coreclr/src/jit/compiler.hpp index 993c287aea54be..f3e590a7303a45 100644 --- a/src/coreclr/src/jit/compiler.hpp +++ b/src/coreclr/src/jit/compiler.hpp @@ -4145,11 +4145,39 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) #endif // MEASURE_CLRAPI_CALLS +//------------------------------------------------------------------------------ +// fgVarIsNeverZeroInitializedInProlog : Check whether the variable is never zero initialized in the prolog. +// +// Arguments: +// varNum - local variable number +// +// Returns: +// true if this is a special variable that is never zero initialized in the prolog; +// false otherwise +// + +bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) +{ + LclVarDsc* varDsc = lvaGetDesc(varNum); + bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (opts.IsOSR() && (varNum == lvaGSSecurityCookie)) || + (varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); + +#if FEATURE_FIXED_OUT_ARGS + result = result || (varNum == lvaPInvokeFrameRegSaveVar) || (varNum == lvaOutgoingArgSpaceVar); +#endif + +#if defined(FEATURE_EH_FUNCLETS) + result = result || (varNum == lvaPSPSym); +#endif + + return result; +} + //------------------------------------------------------------------------------ // fgVarNeedsExplicitZeroInit : Check whether the variable needs an explicit zero initialization. // // Arguments: -// varDsc - local var description +// varNum - local var number // bbInALoop - true if the basic block may be in a loop // bbIsReturn - true if the basic block always returns // @@ -4165,13 +4193,20 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) // - compInitMem is set and the variable has a long lifetime or has gc fields. // In these cases we will insert zero-initialization in the prolog if necessary. -bool Compiler::fgVarNeedsExplicitZeroInit(LclVarDsc* varDsc, bool bbInALoop, bool bbIsReturn) +bool Compiler::fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool bbIsReturn) { + LclVarDsc* varDsc = lvaGetDesc(varNum); + if (bbInALoop && !bbIsReturn) { return true; } + if (fgVarIsNeverZeroInitializedInProlog(varNum)) + { + return true; + } + if (varTypeIsGC(varDsc->lvType)) { return false; diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 617367eef73e90..8f668426490379 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -23787,7 +23787,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (tmpNum != BAD_VAR_NUM) { LclVarDsc* const tmpDsc = lvaGetDesc(tmpNum); - if (!fgVarNeedsExplicitZeroInit(tmpDsc, bbInALoop, bbIsReturn)) + if (!fgVarNeedsExplicitZeroInit(tmpNum, bbInALoop, bbIsReturn)) { JITDUMP("\nSuppressing zero-init for V%02u -- expect to zero in prolog\n", tmpNum); tmpDsc->lvSuppressedZeroInit = 1; diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 06522b71930069..d5bf9bf745541d 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -13883,7 +13883,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) bool bbIsReturn = (block->bbJumpKind == BBJ_RETURN) && (!compIsForInlining() || (impInlineInfo->iciBlock->bbJumpKind == BBJ_RETURN)); LclVarDsc* const lclDsc = lvaGetDesc(lclNum); - if (fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + if (fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { // Append a tree to zero-out the temp newObjThisPtr = gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet()); diff --git a/src/coreclr/src/jit/objectalloc.cpp b/src/coreclr/src/jit/objectalloc.cpp index 4c34b4d36806bc..7be52493cd21e6 100644 --- a/src/coreclr/src/jit/objectalloc.cpp +++ b/src/coreclr/src/jit/objectalloc.cpp @@ -521,7 +521,7 @@ unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* a bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); - if (comp->fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { //------------------------------------------------------------------------ // STMTx (IL 0x... ???) diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index daf4a88444b721..2fe04e7a8b0606 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -9201,11 +9201,17 @@ typedef JitHashTable, unsigned> Lc // When it gets to an assignment to a local variable or a local field, it checks whether the assignment // is the first reference to the local (or to the parent of the local field), and, if so, // it may do one of two optimizations: -// 1. If the local is untracked, the rhs of the assignment is 0, and the local is guaranteed -// to be fully initialized in the prolog, the explicit zero initialization is removed. -// 2. If the assignment is to a local (and not a field) and either the local has no gc pointers or there -// are no gc-safe points between the prolog and the assignment, it marks the local with lvHasExplicitInit -// which tells the codegen not to insert zero initialization for this local in the prolog. +// 1. If the following conditions are true: +// the local is untracked, +// the rhs of the assignment is 0, +// the local is guaranteed to be fully initialized in the prolog, +// then the explicit zero initialization is removed. +// 2. If the following conditions are true: +// the assignment is to a local (and not a field), +// the local is not lvLiveInOutOfHndlr or no exceptions can be thrown between the prolog and the assignment, +// either the local has no gc pointers or there are no gc-safe points between the prolog and the assignment, +// then the local with lvHasExplicitInit which tells the codegen not to insert zero initialization for this +// local in the prolog. void Compiler::optRemoveRedundantZeroInits() { @@ -9219,6 +9225,7 @@ void Compiler::optRemoveRedundantZeroInits() CompAllocator allocator(getAllocator(CMK_ZeroInit)); LclVarRefCounts refCounts(allocator); bool hasGCSafePoint = false; + bool canThrow = false; assert(fgStmtListThreaded); @@ -9231,11 +9238,16 @@ void Compiler::optRemoveRedundantZeroInits() Statement* next = stmt->GetNextStmt(); for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) { - if ((tree->gtFlags & GTF_CALL) != 0) + if (((tree->gtFlags & GTF_CALL) != 0) && (!tree->IsCall() || !tree->AsCall()->IsSuppressGCTransition())) { hasGCSafePoint = true; } + if ((tree->gtFlags & GTF_EXCEPT) != 0) + { + canThrow = true; + } + switch (tree->gtOper) { case GT_LCL_VAR: @@ -9247,7 +9259,7 @@ void Compiler::optRemoveRedundantZeroInits() unsigned* pRefCount = refCounts.LookupPointer(lclNum); if (pRefCount != nullptr) { - *pRefCount = (*pRefCount)++; + *pRefCount = (*pRefCount) + 1; } else { @@ -9274,7 +9286,7 @@ void Compiler::optRemoveRedundantZeroInits() bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; - if (!fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + if (!fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { // We are guaranteed to have a zero initialization in the prolog and // the local hasn't been redefined between the prolog and this explicit @@ -9289,13 +9301,16 @@ void Compiler::optRemoveRedundantZeroInits() } } - if (!removedExplicitZeroInit && treeOp->gtOp1->OperIs(GT_LCL_VAR)) + if (!removedExplicitZeroInit && treeOp->gtOp1->OperIs(GT_LCL_VAR) && + (!canThrow || !lclDsc->lvLiveInOutOfHndlr)) { + // If compMethodRequiresPInvokeFrame() returns true, lower may later + // insert a call to CORINFO_HELP_INIT_PINVOKE_FRAME which is a gc-safe point. if (!lclDsc->HasGCPtr() || (!GetInterruptible() && !hasGCSafePoint && !compMethodRequiresPInvokeFrame())) { // The local hasn't been used and won't be reported to the gc between - // the prolog and this explicit zero intialization. Therefore, it doesn't + // the prolog and this explicit intialization. Therefore, it doesn't // require zero initialization in the prolog. lclDsc->lvHasExplicitInit = 1; }