diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 1931228fca66c5..7441062431e579 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -346,6 +346,7 @@ set( JIT_HEADERS namedintrinsiclist.h objectalloc.h opcode.h + optcse.h phase.h promotion.h rangecheck.h diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 315fd2d0988e2c..8a0d4662ced248 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -77,7 +77,8 @@ struct InitVarDscInfo; // defined in registerargconvention.h class FgStack; // defined in fgbasic.cpp class Instrumentor; // defined in fgprofile.cpp class SpanningTreeVisitor; // defined in fgprofile.cpp -class CSE_DataFlow; // defined in OptCSE.cpp +class CSE_DataFlow; // defined in optcse.cpp +struct CSEdsc; // defined in optcse.h class OptBoolsDsc; // defined in optimizer.cpp struct RelopImplicationInfo; // defined in redundantbranchopts.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp @@ -2482,6 +2483,8 @@ class Compiler friend class Phase; friend class Lowering; friend class CSE_DataFlow; + friend class CSE_HeuristicCommon; + friend class CSE_HeuristicRandom; friend class CSE_Heuristic; friend class CodeGenInterface; friend class CodeGen; @@ -7306,58 +7309,6 @@ class Compiler EXPSET_TP cseCallKillsMask; // Computed once - A mask that is used to kill available CSEs at callsites - /* Generic list of nodes - used by the CSE logic */ - - struct treeLst - { - treeLst* tlNext; - GenTree* tlTree; - }; - - struct treeStmtLst - { - treeStmtLst* tslNext; - GenTree* tslTree; // tree node - Statement* tslStmt; // statement containing the tree - BasicBlock* tslBlock; // block containing the statement - }; - - // The following logic keeps track of expressions via a simple hash table. - - struct CSEdsc - { - CSEdsc* csdNextInBucket; // used by the hash table - size_t csdHashKey; // the original hashkey - ssize_t csdConstDefValue; // When we CSE similar constants, this is the value that we use as the def - ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar - // assignment - unsigned csdIndex; // 1..optCSECandidateCount - bool csdIsSharedConst; // true if this CSE is a shared const - bool csdLiveAcrossCall; - - unsigned short csdDefCount; // definition count - unsigned short csdUseCount; // use count (excluding the implicit uses at defs) - - weight_t csdDefWtCnt; // weighted def count - weight_t csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) - - GenTree* csdTree; // treenode containing the 1st occurrence - Statement* csdStmt; // stmt containing the 1st occurrence - BasicBlock* csdBlock; // block containing the 1st occurrence - - treeStmtLst* csdTreeList; // list of matching tree nodes: head - treeStmtLst* csdTreeLast; // list of matching tree nodes: tail - - ValueNum defExcSetPromise; // The exception set that is now required for all defs of this CSE. - // This will be set to NoVN if we decide to abandon this CSE - - ValueNum defExcSetCurrent; // The set of exceptions we currently can use for CSE uses. - - ValueNum defConservNormVN; // if all def occurrences share the same conservative normal value - // number, this will reflect it; otherwise, NoVN. - // not used for shared const CSE's - }; - static const size_t s_optCSEhashSizeInitial; static const size_t s_optCSEhashGrowthFactor; static const size_t s_optCSEhashBucketSize; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 21fd7c3dc90566..10d60cac236454 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -160,10 +160,6 @@ CONFIG_INTEGER(JitStackChecks, W("JitStackChecks"), 0) CONFIG_INTEGER(JitStress, W("JitStress"), 0) // Internal Jit stress mode: 0 = no stress, 2 = all stress, other = vary // stress based on a hash of the method and this value CONFIG_INTEGER(JitStressBBProf, W("JitStressBBProf"), 0) // Internal Jit stress mode -CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Internal Jit stress mode: decimal bias value - // between (0,100) to perform CSE on a candidate. - // 100% = All CSEs. 0% = 0 CSE. (> 100) means no - // stress. CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable // stress modes listed in JitStressModeNames CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic @@ -402,6 +398,11 @@ CONFIG_INTEGER(JitCSEMask, W("JitCSEMask"), 0) // Enable metric output in jit disasm & elsewhere CONFIG_INTEGER(JitMetrics, W("JitMetrics"), 0) + +// When nonzero, choose CSE candidates randomly, with probability +// specified by the (decimal) value of the config +CONFIG_INTEGER(JitRandomCSE, W("JitRandomCSE"), 0) + #endif /// diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 2d0b9acba46db4..608530c1b5a497 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -16,6 +16,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif +#include "optcse.h" + /* static */ const size_t Compiler::s_optCSEhashSizeInitial = EXPSET_SZ * 2; const size_t Compiler::s_optCSEhashGrowthFactor = 2; @@ -67,7 +69,7 @@ void Compiler::optCSEstop() * Return the descriptor for the CSE with the given index. */ -inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index) +inline CSEdsc* Compiler::optCSEfindDsc(unsigned index) { noway_assert(index); noway_assert(index <= optCSECandidateCount); @@ -1681,1700 +1683,1499 @@ void Compiler::optValnumCSE_Availability() // we use a complex set of heuristic rules // to determine if it is likely to be profitable to perform this CSE // -class CSE_Heuristic + +CSE_HeuristicCommon::CSE_HeuristicCommon(Compiler* pCompiler) : m_pCompiler(pCompiler) { - Compiler* m_pCompiler; - unsigned m_addCSEcount; - - weight_t aggressiveRefCnt; - weight_t moderateRefCnt; - unsigned enregCount; // count of the number of predicted enregistered variables - bool largeFrame; - bool hugeFrame; - bool madeChanges; - Compiler::codeOptimize codeOptKind; - Compiler::CSEdsc** sortTab; - size_t sortSiz; + m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */ + sortTab = nullptr; + sortSiz = 0; + madeChanges = false; + codeOptKind = m_pCompiler->compCodeOpt(); +} + #ifdef DEBUG - CLRRandom m_cseRNG; - unsigned m_bias; -#endif +CSE_HeuristicRandom::CSE_HeuristicRandom(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler) +{ + m_cseRNG.Init(m_pCompiler->info.compMethodHash()); -public: - CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler) - { - codeOptKind = m_pCompiler->compCodeOpt(); - } + // We should either have a bias or stress should be enabled. + m_bias = ReinterpretHexAsDecimal(JitConfig.JitRandomCSE()); - Compiler::codeOptimize CodeOptKind() + if (m_bias == 0) { - return codeOptKind; + // Note this bias will vary per method, depending on hash... + m_bias = m_cseRNG.Next(100); + JITDUMP("JitRandomCSE is OFF, but JitStress is ON: using random bias=%d.\n", m_bias); } - - bool MadeChanges() const + else { - return madeChanges; + JITDUMP("JitRandomCSE is ON; using random bias=%d.\n", m_bias); } +} +#endif - // Perform the Initialization step for our CSE Heuristics. Determine the various cut off values to use for - // the aggressive, moderate and conservative CSE promotions. Count the number of enregisterable variables. - // Determine if the method has a large or huge stack frame. - // - void Initialize() - { - m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */ +CSE_Heuristic::CSE_Heuristic(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler) +{ + aggressiveRefCnt = 0; + moderateRefCnt = 0; + enregCount = 0; + largeFrame = false; + hugeFrame = false; +} - // Record the weighted ref count of the last "for sure" callee saved LclVar - aggressiveRefCnt = 0; - moderateRefCnt = 0; - enregCount = 0; - largeFrame = false; - hugeFrame = false; - sortTab = nullptr; - sortSiz = 0; +// Perform the Initialization step for our CSE Heuristics. Determine the various cut off values to use for +// the aggressive, moderate and conservative CSE promotions. Count the number of enregisterable variables. +// Determine if the method has a large or huge stack frame. +// +void CSE_Heuristic::Initialize() +{ + // Record the weighted ref count of the last "for sure" callee saved LclVar - unsigned frameSize = 0; - unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1); - unsigned lclNum; - LclVarDsc* varDsc; + unsigned frameSize = 0; + unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1); + unsigned lclNum; + LclVarDsc* varDsc; - for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++) + for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++) + { + // Locals with no references don't use any local stack frame slots + if (varDsc->lvRefCnt() == 0) { - // Locals with no references don't use any local stack frame slots - if (varDsc->lvRefCnt() == 0) - { - continue; - } + continue; + } - // Incoming stack arguments don't use any local stack frame slots - if (varDsc->lvIsParam && !varDsc->lvIsRegArg) - { - continue; - } + // Incoming stack arguments don't use any local stack frame slots + if (varDsc->lvIsParam && !varDsc->lvIsRegArg) + { + continue; + } #if FEATURE_FIXED_OUT_ARGS - // Skip the OutgoingArgArea in computing frame size, since - // its size is not yet known and it doesn't affect local - // offsets from the frame pointer (though it may affect - // them from the stack pointer). - noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar) - { - continue; - } + // Skip the OutgoingArgArea in computing frame size, since + // its size is not yet known and it doesn't affect local + // offsets from the frame pointer (though it may affect + // them from the stack pointer). + noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar) + { + continue; + } #endif // FEATURE_FIXED_OUT_ARGS - bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home + bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home - // Some LclVars always have stack homes - if (varDsc->lvDoNotEnregister) - { - onStack = true; - } + // Some LclVars always have stack homes + if (varDsc->lvDoNotEnregister) + { + onStack = true; + } #ifdef TARGET_X86 - // Treat floating point and 64 bit integers as always on the stack - if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet())) - onStack = true; + // Treat floating point and 64 bit integers as always on the stack + if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet())) + { + onStack = true; + } #endif - if (onStack) + if (onStack) + { + frameSize += m_pCompiler->lvaLclSize(lclNum); + } + else + { + // For the purposes of estimating the frameSize we + // will consider this LclVar as being enregistered. + // Now we reduce the remaining regAvailEstimate by + // an appropriate amount. + // + if (varDsc->lvRefCnt() <= 2) { - frameSize += m_pCompiler->lvaLclSize(lclNum); + // a single use single def LclVar only uses 1 + regAvailEstimate -= 1; } else { - // For the purposes of estimating the frameSize we - // will consider this LclVar as being enregistered. - // Now we reduce the remaining regAvailEstimate by - // an appropriate amount. - // - if (varDsc->lvRefCnt() <= 2) + // a LclVar with multiple uses and defs uses 2 + if (regAvailEstimate >= 2) { - // a single use single def LclVar only uses 1 - regAvailEstimate -= 1; + regAvailEstimate -= 2; } else { - // a LclVar with multiple uses and defs uses 2 - if (regAvailEstimate >= 2) - { - regAvailEstimate -= 2; - } - else - { - // Don't try to subtract when regAvailEstimate is 1 - regAvailEstimate = 0; - } + // Don't try to subtract when regAvailEstimate is 1 + regAvailEstimate = 0; } } - -#ifdef TARGET_XARCH - if (frameSize > 0x080) - { - // We likely have a large stack frame. - // - // On XARCH stack frame displacements can either use a 1-byte or a 4-byte displacement. - // With a large frame we will need to use some 4-byte displacements. - // - largeFrame = true; - break; // early out, we don't need to keep increasing frameSize - } -#elif defined(TARGET_ARM) - if (frameSize > 0x0400) - { - // We likely have a large stack frame. - // - // Thus we might need to use large displacements when loading or storing - // to CSE LclVars that are not enregistered. - // On ARM32 this means using rsGetRsvdReg() to hold the large displacement - largeFrame = true; - } - if (frameSize > 0x10000) - { - hugeFrame = true; - break; // early out, we don't need to keep increasing frameSize - } -#elif defined(TARGET_ARM64) - if (frameSize > 0x1000) - { - // We likely have a large stack frame. - // - // Thus we might need to use large displacements when loading or storing - // to CSE LclVars that are not enregistered. - // On ARM64 this means using rsGetRsvdReg() or R21 to hold the large displacement - // - largeFrame = true; - break; // early out, we don't need to keep increasing frameSize - } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (frameSize > 0x7ff) - { - // We likely have a large stack frame. - // - // Thus we might need to use large displacements when loading or storing - // to CSE LclVars that are not enregistered. - // On LoongArch64/RISC-V64 this means using rsGetRsvdReg() to hold the large displacement. - // - largeFrame = true; - break; // early out, we don't need to keep increasing frameSize - } -#endif } - // Iterate over the sorted list of tracked local variables. These are the register candidates for LSRA. - // We normally visit the LclVars in order of their weighted ref counts and our heuristic assumes that the - // highest weighted ref count LclVars will be enregistered and that the lowest weighted ref count - // are likely be allocated in the stack frame. The value of enregCount is incremented when we visit a LclVar - // that can be enregistered. - // - for (unsigned trackedIndex = 0; trackedIndex < m_pCompiler->lvaTrackedCount; trackedIndex++) +#ifdef TARGET_XARCH + if (frameSize > 0x080) { - LclVarDsc* varDsc = m_pCompiler->lvaGetDescByTrackedIndex(trackedIndex); - var_types varTyp = varDsc->TypeGet(); - - // Locals with no references aren't enregistered - if (varDsc->lvRefCnt() == 0) - { - continue; - } - - // Some LclVars always have stack homes - if (varDsc->lvDoNotEnregister) - { - continue; - } - - // enregCount only tracks the uses of integer registers. - // - // We could track floating point register usage separately - // but it isn't worth the additional complexity as floating point CSEs - // are rare and we typically have plenty of floating point register available. - // - if (!varTypeIsFloating(varTyp)) - { - enregCount++; // The primitive types, including TYP_SIMD types use one register - -#ifndef TARGET_64BIT - if (varTyp == TYP_LONG) - { - enregCount++; // on 32-bit targets longs use two registers - } -#endif - } - - // Set the cut off values to use for deciding when we want to use aggressive, moderate or conservative + // We likely have a large stack frame. // - // The value of aggressiveRefCnt and moderateRefCnt start off as zero and - // when enregCount reached a certain value we assign the current LclVar - // (weighted) ref count to aggressiveRefCnt or moderateRefCnt. + // On XARCH stack frame displacements can either use a 1-byte or a 4-byte displacement. + // With a large frame we will need to use some 4-byte displacements. // - const unsigned aggressiveEnregNum = (CNT_CALLEE_ENREG * 3 / 2); - const unsigned moderateEnregNum = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)); + largeFrame = true; + break; // early out, we don't need to keep increasing frameSize + } +#elif defined(TARGET_ARM) + if (frameSize > 0x0400) + { + // We likely have a large stack frame. // - // On Windows x64 this yields: - // aggressiveEnregNum == 12 and moderateEnregNum == 38 - // Thus we will typically set the cutoff values for - // aggressiveRefCnt based upon the weight of T13 (the 13th tracked LclVar) - // moderateRefCnt based upon the weight of T39 (the 39th tracked LclVar) + // Thus we might need to use large displacements when loading or storing + // to CSE LclVars that are not enregistered. + // On ARM32 this means using rsGetRsvdReg() to hold the large displacement + largeFrame = true; + } + if (frameSize > 0x10000) + { + hugeFrame = true; + break; // early out, we don't need to keep increasing frameSize + } +#elif defined(TARGET_ARM64) + if (frameSize > 0x1000) + { + // We likely have a large stack frame. // - // For other architecture and platforms these values dynamically change - // based upon the number of callee saved and callee scratch registers. + // Thus we might need to use large displacements when loading or storing + // to CSE LclVars that are not enregistered. + // On ARM64 this means using rsGetRsvdReg() or R21 to hold the large displacement // - if ((aggressiveRefCnt == 0) && (enregCount > aggressiveEnregNum)) - { - if (CodeOptKind() == Compiler::SMALL_CODE) - { - aggressiveRefCnt = varDsc->lvRefCnt(); - } - else - { - aggressiveRefCnt = varDsc->lvRefCntWtd(); - } - aggressiveRefCnt += BB_UNITY_WEIGHT; - } - if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)))) - { - if (CodeOptKind() == Compiler::SMALL_CODE) - { - moderateRefCnt = varDsc->lvRefCnt(); - } - else - { - moderateRefCnt = varDsc->lvRefCntWtd(); - } - moderateRefCnt += (BB_UNITY_WEIGHT / 2); - } + largeFrame = true; + break; // early out, we don't need to keep increasing frameSize } - - // The minumum value that we want to use for aggressiveRefCnt is BB_UNITY_WEIGHT * 2 - // so increase it when we are below that value - // - aggressiveRefCnt = max(BB_UNITY_WEIGHT * 2, aggressiveRefCnt); - - // The minumum value that we want to use for moderateRefCnt is BB_UNITY_WEIGHT - // so increase it when we are below that value - // - moderateRefCnt = max(BB_UNITY_WEIGHT, moderateRefCnt); - -#ifdef DEBUG - if (m_pCompiler->verbose) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + if (frameSize > 0x7ff) { - printf("\n"); - printf("Aggressive CSE Promotion cutoff is %f\n", aggressiveRefCnt); - printf("Moderate CSE Promotion cutoff is %f\n", moderateRefCnt); - printf("enregCount is %u\n", enregCount); - printf("Framesize estimate is 0x%04X\n", frameSize); - printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small")); + // We likely have a large stack frame. + // + // Thus we might need to use large displacements when loading or storing + // to CSE LclVars that are not enregistered. + // On LoongArch64 this means using rsGetRsvdReg() to hold the large displacement. + // + largeFrame = true; + break; // early out, we don't need to keep increasing frameSize } #endif } - void SortCandidates() + // Iterate over the sorted list of tracked local variables. These are the register candidates for LSRA. + // We normally visit the LclVars in order of their weighted ref counts and our heuristic assumes that the + // highest weighted ref count LclVars will be enregistered and that the lowest weighted ref count + // are likely be allocated in the stack frame. The value of enregCount is incremented when we visit a LclVar + // that can be enregistered. + // + for (unsigned trackedIndex = 0; trackedIndex < m_pCompiler->lvaTrackedCount; trackedIndex++) { - /* Create an expression table sorted by decreasing cost */ - sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount]; + LclVarDsc* varDsc = m_pCompiler->lvaGetDescByTrackedIndex(trackedIndex); + var_types varTyp = varDsc->TypeGet(); - sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab); - memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); - - if (CodeOptKind() == Compiler::SMALL_CODE) + // Locals with no references aren't enregistered + if (varDsc->lvRefCnt() == 0) { - jitstd::sort(sortTab, sortTab + m_pCompiler->optCSECandidateCount, Compiler::optCSEcostCmpSz()); + continue; } - else + + // Some LclVars always have stack homes + if (varDsc->lvDoNotEnregister) { - jitstd::sort(sortTab, sortTab + m_pCompiler->optCSECandidateCount, Compiler::optCSEcostCmpEx()); + continue; } -#ifdef DEBUG - if (m_pCompiler->verbose) + // enregCount only tracks the uses of integer registers. + // + // We could track floating point register usage separately + // but it isn't worth the additional complexity as floating point CSEs + // are rare and we typically have plenty of floating point register available. + // + if (!varTypeIsFloating(varTyp)) { - printf("\nSorted CSE candidates:\n"); - /* Print out the CSE candidates */ - for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) - { - Compiler::CSEdsc* dsc = sortTab[cnt]; - GenTree* expr = dsc->csdTree; - - weight_t def; - weight_t use; - unsigned cost; - - if (CodeOptKind() == Compiler::SMALL_CODE) - { - def = dsc->csdDefCount; // def count - use = dsc->csdUseCount; // use count (excluding the implicit uses at defs) - cost = dsc->csdTree->GetCostSz(); - } - else - { - def = dsc->csdDefWtCnt; // weighted def count - use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) - cost = dsc->csdTree->GetCostEx(); - } - - if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) - { - printf(FMT_CSE ", {$%-3x, $%-3x} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ", - dsc->csdIndex, dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost, - dsc->csdLiveAcrossCall ? ", call" : " "); - } - else - { - size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); - printf(FMT_CSE ", {K_%p} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ", dsc->csdIndex, - dspPtr(kVal), dsc->csdUseCount, def, use, cost, - dsc->csdLiveAcrossCall ? ", call" : " "); - } + enregCount++; // The primitive types, including TYP_SIMD types use one register - m_pCompiler->gtDispTree(expr, nullptr, nullptr, true); +#ifndef TARGET_64BIT + if (varTyp == TYP_LONG) + { + enregCount++; // on 32-bit targets longs use two registers } - printf("\n"); +#endif } -#endif // DEBUG - } - - // The following class nested within CSE_Heuristic encapsulates the information - // about the current CSE candidate that is under consideration - // - // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation - // and needs to be reworked for the Value Number based implementation - // - class CSE_Candidate - { - CSE_Heuristic* m_context; - Compiler::CSEdsc* m_CseDsc; - - unsigned m_cseIndex; - weight_t m_defCount; - weight_t m_useCount; - unsigned m_Cost; - unsigned m_Size; - // When this Candidate is successfully promoted to a CSE we record - // the following information about what category was used when promoting it. + // Set the cut off values to use for deciding when we want to use aggressive, moderate or conservative // - // We will set m_Aggressive: - // When we believe that the CSE very valuable in terms of weighted ref counts, - // such that it would always be enregistered by the register allocator. + // The value of aggressiveRefCnt and moderateRefCnt start off as zero and + // when enregCount reached a certain value we assign the current LclVar + // (weighted) ref count to aggressiveRefCnt or moderateRefCnt. // - // We will set m_Moderate: - // When we believe that the CSE is moderately valuable in terms of weighted ref counts, - // such that it is more likely than not to be enregistered by the register allocator + const unsigned aggressiveEnregNum = (CNT_CALLEE_ENREG * 3 / 2); + const unsigned moderateEnregNum = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)); // - // We will set m_Conservative: - // When we didn't set m_Aggressive or m_Moderate. - // Such candidates typically are expensive to compute and thus are - // always profitable to promote even when they aren't enregistered. + // On Windows x64 this yields: + // aggressiveEnregNum == 12 and moderateEnregNum == 38 + // Thus we will typically set the cutoff values for + // aggressiveRefCnt based upon the weight of T13 (the 13th tracked LclVar) + // moderateRefCnt based upon the weight of T39 (the 39th tracked LclVar) // - // We will set m_StressCSE: - // When the candidate is only being promoted because of a Stress mode. + // For other architecture and platforms these values dynamically change + // based upon the number of callee saved and callee scratch registers. // - bool m_Aggressive; - bool m_Moderate; - bool m_Conservative; - bool m_StressCSE; - - public: - CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) - : m_context(context) - , m_CseDsc(cseDsc) - , m_cseIndex(m_CseDsc->csdIndex) - , m_defCount(0) - , m_useCount(0) - , m_Cost(0) - , m_Size(0) - , m_Aggressive(false) - , m_Moderate(false) - , m_Conservative(false) - , m_StressCSE(false) - { - } - - Compiler::CSEdsc* CseDsc() - { - return m_CseDsc; - } - unsigned CseIndex() - { - return m_cseIndex; - } - weight_t DefCount() - { - return m_defCount; - } - weight_t UseCount() - { - return m_useCount; - } - // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. - GenTree* Expr() - { - return m_CseDsc->csdTree; - } - unsigned Cost() - { - return m_Cost; - } - unsigned Size() - { - return m_Size; - } - - bool IsSharedConst() + if ((aggressiveRefCnt == 0) && (enregCount > aggressiveEnregNum)) { - return m_CseDsc->csdIsSharedConst; + if (CodeOptKind() == Compiler::SMALL_CODE) + { + aggressiveRefCnt = varDsc->lvRefCnt(); + } + else + { + aggressiveRefCnt = varDsc->lvRefCntWtd(); + } + aggressiveRefCnt += BB_UNITY_WEIGHT; } - - bool LiveAcrossCall() + if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)))) { - return m_CseDsc->csdLiveAcrossCall; + if (CodeOptKind() == Compiler::SMALL_CODE) + { + moderateRefCnt = varDsc->lvRefCnt(); + } + else + { + moderateRefCnt = varDsc->lvRefCntWtd(); + } + moderateRefCnt += (BB_UNITY_WEIGHT / 2); } + } - void SetAggressive() - { - m_Aggressive = true; - } + // The minumum value that we want to use for aggressiveRefCnt is BB_UNITY_WEIGHT * 2 + // so increase it when we are below that value + // + aggressiveRefCnt = max(BB_UNITY_WEIGHT * 2, aggressiveRefCnt); - bool IsAggressive() - { - return m_Aggressive; - } + // The minumum value that we want to use for moderateRefCnt is BB_UNITY_WEIGHT + // so increase it when we are below that value + // + moderateRefCnt = max(BB_UNITY_WEIGHT, moderateRefCnt); - void SetModerate() - { - m_Moderate = true; - } +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\n"); + printf("Aggressive CSE Promotion cutoff is %f\n", aggressiveRefCnt); + printf("Moderate CSE Promotion cutoff is %f\n", moderateRefCnt); + printf("enregCount is %u\n", enregCount); + printf("Framesize estimate is 0x%04X\n", frameSize); + printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small")); + } +#endif +} - bool IsModerate() - { - return m_Moderate; - } +void CSE_Heuristic::SortCandidates() +{ + /* Create an expression table sorted by decreasing cost */ + sortTab = new (m_pCompiler, CMK_CSE) CSEdsc*[m_pCompiler->optCSECandidateCount]; - void SetConservative() - { - m_Conservative = true; - } + sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab); + memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); - bool IsConservative() - { - return m_Conservative; - } + if (CodeOptKind() == Compiler::SMALL_CODE) + { + jitstd::sort(sortTab, sortTab + m_pCompiler->optCSECandidateCount, Compiler::optCSEcostCmpSz()); + } + else + { + jitstd::sort(sortTab, sortTab + m_pCompiler->optCSECandidateCount, Compiler::optCSEcostCmpEx()); + } - void SetStressCSE() +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nSorted CSE candidates:\n"); + /* Print out the CSE candidates */ + for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) { - m_StressCSE = true; - } + CSEdsc* dsc = sortTab[cnt]; + GenTree* expr = dsc->csdTree; - bool IsStressCSE() - { - return m_StressCSE; - } + weight_t def; + weight_t use; + unsigned cost; - void InitializeCounts() - { - m_Size = Expr()->GetCostSz(); // always the GetCostSz() - if (m_context->CodeOptKind() == Compiler::SMALL_CODE) + if (CodeOptKind() == Compiler::SMALL_CODE) { - m_Cost = m_Size; // the estimated code size - m_defCount = m_CseDsc->csdDefCount; // def count - m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs) + def = dsc->csdDefCount; // def count + use = dsc->csdUseCount; // use count (excluding the implicit uses at defs) + cost = dsc->csdTree->GetCostSz(); } else { - m_Cost = Expr()->GetCostEx(); // the estimated execution cost - m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count - m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) - } - } - }; - -#ifdef DEBUG - //------------------------------------------------------------------------ - // optConfigBiasedCSE: - // Stress mode to shuffle the decision to CSE or not using environment - // variable DOTNET_JitStressBiasedCSE (= 0 to 100%). When the bias value - // is not specified but DOTNET_JitStress is ON, generate a random bias. - // - // Return Value: - // 0 -- This method is indifferent about this CSE (no bias specified and no stress) - // 1 -- This CSE must be performed to maintain specified/generated bias. - // -1 -- This CSE mustn't be performed to maintain specified/generated bias. - // - // Operation: - // A debug stress only method that returns "1" with probability (P) - // defined by: - // - // P = (DOTNET_JitStressBiasedCSE / 100) (or) - // P = (random(100) / 100) when DOTNET_JitStress is specified and - // DOTNET_JitStressBiasedCSE is unspecified. - // - // When specified, the bias is reinterpreted as a decimal number between 0 - // to 100. - // When bias is not specified, a bias is randomly generated if DOTNET_JitStress - // is non-zero. - // - // Callers are supposed to call this method for each CSE promotion decision - // and ignore the call if return value is 0 and honor the 1 with a CSE and - // -1 with a no-CSE to maintain the specified/generated bias. - // - int optConfigBiasedCSE() - { - // Seed the PRNG, if never done before. - if (!m_cseRNG.IsInitialized()) - { - m_cseRNG.Init(m_pCompiler->info.compMethodHash()); - m_bias = m_cseRNG.Next(100); - } - - // Obtain the bias value and reinterpret as decimal. - unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE()); - - // Invalid value, check if JitStress is ON. - if (bias > 100) - { - if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT)) - { - // JitStress is OFF for CSE, nothing to do. - return 0; + def = dsc->csdDefWtCnt; // weighted def count + use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) + cost = dsc->csdTree->GetCostEx(); } - bias = m_bias; - JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias); - } - // Generate a number between (0, 99) and if the generated - // number is smaller than bias, then perform CSE. - unsigned gen = m_cseRNG.Next(100); - int ret = (gen < bias) ? 1 : -1; - - if (m_pCompiler->verbose) - { - if (ret < 0) + if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) { - printf("No CSE because gen=%d >= bias=%d\n", gen, bias); + printf(FMT_CSE ", {$%-3x, $%-3x} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ", dsc->csdIndex, + dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost, + dsc->csdLiveAcrossCall ? ", call" : " "); } else { - printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias); + size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); + printf(FMT_CSE ", {K_%p} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ", dsc->csdIndex, + dspPtr(kVal), dsc->csdUseCount, def, use, cost, dsc->csdLiveAcrossCall ? ", call" : " "); } - } - // Indicate whether to perform CSE or not. - return ret; + m_pCompiler->gtDispTree(expr, nullptr, nullptr, true); + } + printf("\n"); } -#endif +#endif // DEBUG +} - // Given a CSE candidate decide whether it passes or fails the profitability heuristic - // return true if we believe that it is profitable to promote this candidate to a CSE - // - bool PromotionCheck(CSE_Candidate* candidate) +#ifdef DEBUG +//------------------------------------------------------------------------ +// PromotionCheck: decide whether to perform this CSE +// +// Arguments: +// candidate - cse candidate to consider +// +// Return Value: +// true if the CSE should be performed +// +bool CSE_HeuristicRandom::PromotionCheck(CSE_Candidate* candidate) +{ + // Generate a number between (0, 99) and if the generated + // number is smaller than bias, then perform CSE. + unsigned const gen = m_cseRNG.Next(100); + bool const doCSE = gen < m_bias; + + JITDUMP("%s CSE; gen=%d; bias=%d\n", doCSE ? "Promoting" : "No", gen, m_bias); + + if (doCSE) { - bool result = false; + candidate->SetStressCSE(); + } -#ifdef DEBUG - int stressResult = optConfigBiasedCSE(); - if (stressResult != 0) - { - // Stress is enabled. Check whether to perform CSE or not. - if (stressResult > 0) - { - candidate->SetStressCSE(); - return true; - } - } + return doCSE; +} + +//------------------------------------------------------------------------ +// SortCandidate: fillin the CSE sort tab +// +void CSE_HeuristicRandom::SortCandidates() +{ + // Just fill in the sort tab, but don't bother sorting as order + // should not matter for random CSEs + // + sortTab = new (m_pCompiler, CMK_CSE) CSEdsc*[m_pCompiler->optCSECandidateCount]; + sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab); + memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); +} - if (m_pCompiler->optConfigDisableCSE2()) - { - return false; // skip this CSE - } #endif - /* - Our calculation is based on the following cost estimate formula +//------------------------------------------------------------------------ +// PromotionCheck: decide whether to perform this CSE +// +// Arguments: +// candidate - cse candidate to consider +// +// Return Value: +// true if the CSE should be performed +// +bool CSE_Heuristic::PromotionCheck(CSE_Candidate* candidate) +{ + bool result = false; - Existing costs are: +#ifdef DEBUG + if (m_pCompiler->optConfigDisableCSE2()) + { + return false; // skip this CSE + } +#endif - (def + use) * cost + /* + Our calculation is based on the following cost estimate formula - If we introduce a CSE temp at each definition and - replace each use with a CSE temp then our cost is: + Existing costs are: - (def * (cost + cse-def-cost)) + (use * cse-use-cost) + (def + use) * cost - We must estimate the values to use for cse-def-cost and cse-use-cost + If we introduce a CSE temp at each definition and + replace each use with a CSE temp then our cost is: - If we are able to enregister the CSE then the cse-use-cost is one - and cse-def-cost is either zero or one. Zero in the case where - we needed to evaluate the def into a register and we can use that - register as the CSE temp as well. + (def * (cost + cse-def-cost)) + (use * cse-use-cost) - If we are unable to enregister the CSE then the cse-use-cost is IND_COST - and the cse-def-cost is also IND_COST. + We must estimate the values to use for cse-def-cost and cse-use-cost - If we want to be conservative we use IND_COST as the value - for both cse-def-cost and cse-use-cost and then we never introduce - a CSE that could pessimize the execution time of the method. + If we are able to enregister the CSE then the cse-use-cost is one + and cse-def-cost is either zero or one. Zero in the case where + we needed to evaluate the def into a register and we can use that + register as the CSE temp as well. - If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the - values for both cse-def-cost and cse-use-cost. + If we are unable to enregister the CSE then the cse-use-cost is IND_COST + and the cse-def-cost is also IND_COST. - If we want to be aggressive we use 1 as the values for both - cse-def-cost and cse-use-cost. + If we want to be conservative we use IND_COST as the value + for both cse-def-cost and cse-use-cost and then we never introduce + a CSE that could pessimize the execution time of the method. - If we believe that the CSE is very valuable in terms of weighted ref counts - such that it would always be enregistered by the register allocator we choose - the aggressive use def costs. + If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the + values for both cse-def-cost and cse-use-cost. - If we believe that the CSE is somewhat valuable in terms of weighted ref counts - such that it could be likely be enregistered by the register allocator we choose - the moderate use def costs. + If we want to be aggressive we use 1 as the values for both + cse-def-cost and cse-use-cost. - Otherwise we choose the conservative use def costs. + If we believe that the CSE is very valuable in terms of weighted ref counts + such that it would always be enregistered by the register allocator we choose + the aggressive use def costs. - */ + If we believe that the CSE is somewhat valuable in terms of weighted ref counts + such that it could be likely be enregistered by the register allocator we choose + the moderate use def costs. - unsigned cse_def_cost; - unsigned cse_use_cost; + Otherwise we choose the conservative use def costs. - weight_t no_cse_cost = 0; - weight_t yes_cse_cost = 0; - unsigned extra_yes_cost = 0; - unsigned extra_no_cost = 0; + */ - // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar - // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar - weight_t cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount(); + unsigned cse_def_cost; + unsigned cse_use_cost; - bool canEnregister = true; - unsigned slotCount = 1; - if (candidate->Expr()->TypeIs(TYP_STRUCT)) - { - // This is a non-enregisterable struct. - canEnregister = false; - unsigned size = candidate->Expr()->GetLayout(m_pCompiler)->GetSize(); - // Note that the slotCount is used to estimate the reference cost, but it may overestimate this - // because it doesn't take into account that we might use a vector register for struct copies. - slotCount = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; - } + weight_t no_cse_cost = 0; + weight_t yes_cse_cost = 0; + unsigned extra_yes_cost = 0; + unsigned extra_no_cost = 0; + + // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar + // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar + weight_t cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount(); - if (CodeOptKind() == Compiler::SMALL_CODE) + bool canEnregister = true; + unsigned slotCount = 1; + if (candidate->Expr()->TypeIs(TYP_STRUCT)) + { + // This is a non-enregisterable struct. + canEnregister = false; + unsigned size = candidate->Expr()->GetLayout(m_pCompiler)->GetSize(); + // Note that the slotCount is used to estimate the reference cost, but it may overestimate this + // because it doesn't take into account that we might use a vector register for struct copies. + slotCount = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + } + + if (CodeOptKind() == Compiler::SMALL_CODE) + { + // Note that when optimizing for SMALL_CODE we set the cse_def_cost/cse_use_cost based + // upon the code size and we use unweighted ref counts instead of weighted ref counts. + // Also note that optimizing for SMALL_CODE is rare, we typically only optimize this way + // for class constructors, because we know that they will only run once. + // + if (cseRefCnt >= aggressiveRefCnt) { - // Note that when optimizing for SMALL_CODE we set the cse_def_cost/cse_use_cost based - // upon the code size and we use unweighted ref counts instead of weighted ref counts. - // Also note that optimizing for SMALL_CODE is rare, we typically only optimize this way - // for class constructors, because we know that they will only run once. + // Record that we are choosing to use the aggressive promotion rules // - if (cseRefCnt >= aggressiveRefCnt) - { - // Record that we are choosing to use the aggressive promotion rules - // - candidate->SetAggressive(); + candidate->SetAggressive(); #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt); - } + if (m_pCompiler->verbose) + { + printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt); + } #endif - // With aggressive promotion we expect that the candidate will be enregistered - // so we set the use and def costs to their miniumum values - // - cse_def_cost = 1; - cse_use_cost = 1; + // With aggressive promotion we expect that the candidate will be enregistered + // so we set the use and def costs to their miniumum values + // + cse_def_cost = 1; + cse_use_cost = 1; - // Check if this candidate is likely to live on the stack + // Check if this candidate is likely to live on the stack + // + if (candidate->LiveAcrossCall() || !canEnregister) + { + // Increase the costs when we have a large or huge frame // - if (candidate->LiveAcrossCall() || !canEnregister) + if (largeFrame) { - // Increase the costs when we have a large or huge frame - // - if (largeFrame) - { - cse_def_cost++; - cse_use_cost++; - } - if (hugeFrame) - { - cse_def_cost++; - cse_use_cost++; - } + cse_def_cost++; + cse_use_cost++; + } + if (hugeFrame) + { + cse_def_cost++; + cse_use_cost++; } } - else // not aggressiveRefCnt + } + else // not aggressiveRefCnt + { + // Record that we are choosing to use the conservative promotion rules + // + candidate->SetConservative(); + if (largeFrame) { - // Record that we are choosing to use the conservative promotion rules - // - candidate->SetConservative(); - if (largeFrame) - { #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Codesize CSE Promotion (%s frame)\n", hugeFrame ? "huge" : "large"); - } + if (m_pCompiler->verbose) + { + printf("Codesize CSE Promotion (%s frame)\n", hugeFrame ? "huge" : "large"); + } #endif #ifdef TARGET_XARCH - /* The following formula is good choice when optimizing CSE for SMALL_CODE */ - cse_def_cost = 6; // mov [EBP-0x00001FC],reg - cse_use_cost = 5; // [EBP-0x00001FC] -#else // TARGET_ARM - if (hugeFrame) - { - cse_def_cost = 10 + 2; // movw/movt r10 and str reg,[sp+r10] - cse_use_cost = 10 + 2; - } - else - { - cse_def_cost = 6 + 2; // movw r10 and str reg,[sp+r10] - cse_use_cost = 6 + 2; - } -#endif + /* The following formula is good choice when optimizing CSE for SMALL_CODE */ + cse_def_cost = 6; // mov [EBP-0x00001FC],reg + cse_use_cost = 5; // [EBP-0x00001FC] +#else // TARGET_ARM + if (hugeFrame) + { + cse_def_cost = 10 + 2; // movw/movt r10 and str reg,[sp+r10] + cse_use_cost = 10 + 2; } - else // small frame + else { + cse_def_cost = 6 + 2; // movw r10 and str reg,[sp+r10] + cse_use_cost = 6 + 2; + } +#endif + } + else // small frame + { #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Codesize CSE Promotion (small frame)\n"); - } + if (m_pCompiler->verbose) + { + printf("Codesize CSE Promotion (small frame)\n"); + } #endif #ifdef TARGET_XARCH - /* The following formula is good choice when optimizing CSE for SMALL_CODE */ - cse_def_cost = 3; // mov [EBP-1C],reg - cse_use_cost = 2; // [EBP-1C] + /* The following formula is good choice when optimizing CSE for SMALL_CODE */ + cse_def_cost = 3; // mov [EBP-1C],reg + cse_use_cost = 2; // [EBP-1C] #else // TARGET_ARM - cse_def_cost = 2; // str reg,[sp+0x9c] - cse_use_cost = 2; // ldr reg,[sp+0x9c] + cse_def_cost = 2; // str reg,[sp+0x9c] + cse_use_cost = 2; // ldr reg,[sp+0x9c] #endif - } } + } #ifdef TARGET_AMD64 - if (varTypeIsFloating(candidate->Expr()->TypeGet())) + if (varTypeIsFloating(candidate->Expr()->TypeGet())) + { + // floating point loads/store encode larger + cse_def_cost += 2; + cse_use_cost += 1; + } +#endif // TARGET_AMD64 + } + else // not SMALL_CODE ... + { + // Note that when optimizing for BLENDED_CODE or FAST_CODE we set cse_def_cost/cse_use_cost + // based upon the execution costs of the code and we use weighted ref counts. + // + if ((cseRefCnt >= aggressiveRefCnt) && canEnregister) + { + // Record that we are choosing to use the aggressive promotion rules + // + candidate->SetAggressive(); +#ifdef DEBUG + if (m_pCompiler->verbose) { - // floating point loads/store encode larger - cse_def_cost += 2; - cse_use_cost += 1; + printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt); } -#endif // TARGET_AMD64 +#endif + // With aggressive promotion we expect that the candidate will be enregistered + // so we set the use and def costs to their miniumum values + // + cse_def_cost = 1; + cse_use_cost = 1; } - else // not SMALL_CODE ... + else if (cseRefCnt >= moderateRefCnt) { - // Note that when optimizing for BLENDED_CODE or FAST_CODE we set cse_def_cost/cse_use_cost - // based upon the execution costs of the code and we use weighted ref counts. + // Record that we are choosing to use the moderate promotion rules // - if ((cseRefCnt >= aggressiveRefCnt) && canEnregister) + candidate->SetModerate(); + if (!candidate->LiveAcrossCall() && canEnregister) { - // Record that we are choosing to use the aggressive promotion rules - // - candidate->SetAggressive(); #ifdef DEBUG if (m_pCompiler->verbose) { - printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt); + printf("Moderate CSE Promotion (CSE never live at call) (%f >= %f)\n", cseRefCnt, moderateRefCnt); } #endif - // With aggressive promotion we expect that the candidate will be enregistered - // so we set the use and def costs to their miniumum values - // - cse_def_cost = 1; + cse_def_cost = 2; cse_use_cost = 1; } - else if (cseRefCnt >= moderateRefCnt) + else // candidate is live across call or not enregisterable. { - // Record that we are choosing to use the moderate promotion rules - // - candidate->SetModerate(); - if (!candidate->LiveAcrossCall() && canEnregister) - { #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Moderate CSE Promotion (CSE never live at call) (%f >= %f)\n", cseRefCnt, - moderateRefCnt); - } -#endif - cse_def_cost = 2; - cse_use_cost = 1; - } - else // candidate is live across call or not enregisterable. + if (m_pCompiler->verbose) { -#ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Moderate CSE Promotion (%s) (%f >= %f)\n", - candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", - cseRefCnt, moderateRefCnt); - } + printf("Moderate CSE Promotion (%s) (%f >= %f)\n", + candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", cseRefCnt, + moderateRefCnt); + } #endif - cse_def_cost = 2; - if (canEnregister) + cse_def_cost = 2; + if (canEnregister) + { + if (enregCount < (CNT_CALLEE_ENREG * 3 / 2)) { - if (enregCount < (CNT_CALLEE_ENREG * 3 / 2)) - { - cse_use_cost = 1; - } - else - { - cse_use_cost = 2; - } + cse_use_cost = 1; } else { - cse_use_cost = 3; + cse_use_cost = 2; } } + else + { + cse_use_cost = 3; + } } - else // Conservative CSE promotion + } + else // Conservative CSE promotion + { + // Record that we are choosing to use the conservative promotion rules + // + candidate->SetConservative(); + if (!candidate->LiveAcrossCall() && canEnregister) { - // Record that we are choosing to use the conservative promotion rules - // - candidate->SetConservative(); - if (!candidate->LiveAcrossCall() && canEnregister) - { #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Conservative CSE Promotion (%s) (%f < %f)\n", - candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", - cseRefCnt, moderateRefCnt); - } -#endif - cse_def_cost = 2; - cse_use_cost = 2; - } - else // candidate is live across call + if (m_pCompiler->verbose) { -#ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("Conservative CSE Promotion (%f < %f)\n", cseRefCnt, moderateRefCnt); - } -#endif - cse_def_cost = 2; - cse_use_cost = 3; + printf("Conservative CSE Promotion (%s) (%f < %f)\n", + candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", cseRefCnt, + moderateRefCnt); } - - // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable - if (m_pCompiler->lvaTrackedCount == (unsigned)JitConfig.JitMaxLocalsToTrack()) +#endif + cse_def_cost = 2; + cse_use_cost = 2; + } + else // candidate is live across call + { +#ifdef DEBUG + if (m_pCompiler->verbose) { - cse_def_cost += 1; - cse_use_cost += 1; + printf("Conservative CSE Promotion (%f < %f)\n", cseRefCnt, moderateRefCnt); } +#endif + cse_def_cost = 2; + cse_use_cost = 3; + } + + // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable + if (m_pCompiler->lvaTrackedCount == (unsigned)JitConfig.JitMaxLocalsToTrack()) + { + cse_def_cost += 1; + cse_use_cost += 1; } } + } + + if (slotCount > 1) + { + cse_def_cost *= slotCount; + cse_use_cost *= slotCount; + } - if (slotCount > 1) + // If this CSE is live across a call then we may have additional costs + // + if (candidate->LiveAcrossCall()) + { + // If we have a floating-point CSE that is both live across a call and there + // are no callee-saved FP registers available, the RA will have to spill at + // the def site and reload at the (first) use site, if the variable is a register + // candidate. Account for that. + if (varTypeIsFloating(candidate->Expr()) && (CNT_CALLEE_SAVED_FLOAT == 0) && !candidate->IsConservative()) { - cse_def_cost *= slotCount; - cse_use_cost *= slotCount; + cse_def_cost += 1; + cse_use_cost += 1; } - // If this CSE is live across a call then we may have additional costs + // If we don't have a lot of variables to enregister or we have a floating point type + // then we will likely need to spill an additional caller save register. // - if (candidate->LiveAcrossCall()) + if ((enregCount < (CNT_CALLEE_ENREG * 3 / 2)) || varTypeIsFloating(candidate->Expr())) { - // If we have a floating-point CSE that is both live across a call and there - // are no callee-saved FP registers available, the RA will have to spill at - // the def site and reload at the (first) use site, if the variable is a register - // candidate. Account for that. - if (varTypeIsFloating(candidate->Expr()) && (CNT_CALLEE_SAVED_FLOAT == 0) && !candidate->IsConservative()) - { - cse_def_cost += 1; - cse_use_cost += 1; - } + // Extra cost in case we have to spill/restore a caller saved register + extra_yes_cost = BB_UNITY_WEIGHT_UNSIGNED; - // If we don't have a lot of variables to enregister or we have a floating point type - // then we will likely need to spill an additional caller save register. - // - if ((enregCount < (CNT_CALLEE_ENREG * 3 / 2)) || varTypeIsFloating(candidate->Expr())) + if (cseRefCnt < moderateRefCnt) // If Conservative CSE promotion { - // Extra cost in case we have to spill/restore a caller saved register - extra_yes_cost = BB_UNITY_WEIGHT_UNSIGNED; - - if (cseRefCnt < moderateRefCnt) // If Conservative CSE promotion - { - extra_yes_cost *= 2; // full cost if we are being Conservative - } + extra_yes_cost *= 2; // full cost if we are being Conservative } + } #ifdef FEATURE_SIMD - // SIMD types may cause a SIMD register to be spilled/restored in the prolog and epilog. + // SIMD types may cause a SIMD register to be spilled/restored in the prolog and epilog. + // + if (varTypeIsSIMD(candidate->Expr()->TypeGet())) + { + // We don't have complete information about when these extra spilled/restore will be needed. + // Instead we are conservative and assume that each SIMD CSE that is live across a call + // will cause an additional spill/restore in the prolog and epilog. // - if (varTypeIsSIMD(candidate->Expr()->TypeGet())) - { - // We don't have complete information about when these extra spilled/restore will be needed. - // Instead we are conservative and assume that each SIMD CSE that is live across a call - // will cause an additional spill/restore in the prolog and epilog. - // - int spillSimdRegInProlog = 1; + int spillSimdRegInProlog = 1; #if defined(TARGET_XARCH) - // If we have a SIMD32/64 that is live across a call we have even higher spill costs + // If we have a SIMD32/64 that is live across a call we have even higher spill costs + // + if (candidate->Expr()->TypeIs(TYP_SIMD32, TYP_SIMD64)) + { + // Additionally for a simd32 CSE candidate we assume that and second spilled/restore will be needed. + // (to hold the upper half of the simd32 register that isn't preserved across the call) // - if (candidate->Expr()->TypeIs(TYP_SIMD32, TYP_SIMD64)) - { - // Additionally for a simd32 CSE candidate we assume that and second spilled/restore will be needed. - // (to hold the upper half of the simd32 register that isn't preserved across the call) - // - spillSimdRegInProlog++; + spillSimdRegInProlog++; - // We also increase the CSE use cost here to because we may have to generate instructions - // to move the upper half of the simd32 before and after a call. - // - cse_use_cost += 2; - } + // We also increase the CSE use cost here to because we may have to generate instructions + // to move the upper half of the simd32 before and after a call. + // + cse_use_cost += 2; + } #endif // TARGET_XARCH - extra_yes_cost = (BB_UNITY_WEIGHT_UNSIGNED * spillSimdRegInProlog) * 3; - } -#endif // FEATURE_SIMD + extra_yes_cost = (BB_UNITY_WEIGHT_UNSIGNED * spillSimdRegInProlog) * 3; } +#endif // FEATURE_SIMD + } - // estimate the cost from lost codesize reduction if we do not perform the CSE - if (candidate->Size() > cse_use_cost) - { - Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the - // weighted count - extra_no_cost = candidate->Size() - cse_use_cost; - extra_no_cost = extra_no_cost * dsc->csdUseCount * 2; - } + // estimate the cost from lost codesize reduction if we do not perform the CSE + if (candidate->Size() > cse_use_cost) + { + CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the + // weighted count + extra_no_cost = candidate->Size() - cse_use_cost; + extra_no_cost = extra_no_cost * dsc->csdUseCount * 2; + } - /* no_cse_cost is the cost estimate when we decide not to make a CSE */ - /* yes_cse_cost is the cost estimate when we decide to make a CSE */ + /* no_cse_cost is the cost estimate when we decide not to make a CSE */ + /* yes_cse_cost is the cost estimate when we decide to make a CSE */ - no_cse_cost = candidate->UseCount() * candidate->Cost(); - yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); + no_cse_cost = candidate->UseCount() * candidate->Cost(); + yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); - no_cse_cost += extra_no_cost; - yes_cse_cost += extra_yes_cost; + no_cse_cost += extra_no_cost; + yes_cse_cost += extra_yes_cost; #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("cseRefCnt=%f, aggressiveRefCnt=%f, moderateRefCnt=%f\n", cseRefCnt, aggressiveRefCnt, - moderateRefCnt); - printf("defCnt=%f, useCnt=%f, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(), - candidate->Cost(), candidate->Size(), candidate->LiveAcrossCall() ? ", LiveAcrossCall" : ""); - printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost, - extra_no_cost, extra_yes_cost); + if (m_pCompiler->verbose) + { + printf("cseRefCnt=%f, aggressiveRefCnt=%f, moderateRefCnt=%f\n", cseRefCnt, aggressiveRefCnt, moderateRefCnt); + printf("defCnt=%f, useCnt=%f, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(), + candidate->Cost(), candidate->Size(), candidate->LiveAcrossCall() ? ", LiveAcrossCall" : ""); + printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost, + extra_no_cost, extra_yes_cost); - printf("CSE cost savings check (%f >= %f) %s\n", no_cse_cost, yes_cse_cost, - (no_cse_cost >= yes_cse_cost) ? "passes" : "fails"); - } + printf("CSE cost savings check (%f >= %f) %s\n", no_cse_cost, yes_cse_cost, + (no_cse_cost >= yes_cse_cost) ? "passes" : "fails"); + } #endif // DEBUG - // Should we make this candidate into a CSE? - // Is the yes cost less than the no cost - // - if (yes_cse_cost <= no_cse_cost) - { - result = true; // Yes make this a CSE - } - else + // Should we make this candidate into a CSE? + // Is the yes cost less than the no cost + // + if (yes_cse_cost <= no_cse_cost) + { + result = true; // Yes make this a CSE + } + else + { + /* In stress mode we will make some extra CSEs */ + if (no_cse_cost > 0) { - /* In stress mode we will make some extra CSEs */ - if (no_cse_cost > 0) - { - int percentage = (int)((no_cse_cost * 100) / yes_cse_cost); + int percentage = (int)((no_cse_cost * 100) / yes_cse_cost); - if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage)) - { - result = true; // Yes make this a CSE - } + if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage)) + { + result = true; // Yes make this a CSE } } - - return result; } - // IsCompatibleType() takes two var_types and returns true if they - // are compatible types for CSE substitution - // - bool IsCompatibleType(var_types cseLclVarTyp, var_types expTyp) - { - // Exact type match is the expected case - if (cseLclVarTyp == expTyp) - { - return true; - } - - // We also allow TYP_BYREF and TYP_I_IMPL as compatible types - // - if ((cseLclVarTyp == TYP_BYREF) && (expTyp == TYP_I_IMPL)) - { - return true; - } - if ((cseLclVarTyp == TYP_I_IMPL) && (expTyp == TYP_BYREF)) - { - return true; - } + return result; +} - // Otherwise we have incompatible types - return false; +// IsCompatibleType() takes two var_types and returns true if they +// are compatible types for CSE substitution +// +bool CSE_HeuristicCommon::IsCompatibleType(var_types cseLclVarTyp, var_types expTyp) +{ + // Exact type match is the expected case + if (cseLclVarTyp == expTyp) + { + return true; } - // PerformCSE() takes a successful candidate and performs the appropriate replacements: - // - // It will replace all of the CSE defs with assignments to a new "cse0" LclVar - // and will replace all of the CSE uses with reads of the "cse0" LclVar + // We also allow TYP_BYREF and TYP_I_IMPL as compatible types // - // It will also put cse0 into SSA if there is just one def. - void PerformCSE(CSE_Candidate* successfulCandidate) + if ((cseLclVarTyp == TYP_BYREF) && (expTyp == TYP_I_IMPL)) { - weight_t cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount(); - - if (successfulCandidate->LiveAcrossCall() != 0) - { - // As we introduce new LclVars for these CSE we slightly - // increase the cutoffs for aggressive and moderate CSE's - // - weight_t incr = BB_UNITY_WEIGHT; + return true; + } + if ((cseLclVarTyp == TYP_I_IMPL) && (expTyp == TYP_BYREF)) + { + return true; + } - if (cseRefCnt > aggressiveRefCnt) - { - aggressiveRefCnt += incr; - } + // Otherwise we have incompatible types + return false; +} - if (cseRefCnt > moderateRefCnt) - { - moderateRefCnt += (incr / 2); - } - } +// PerformCSE() takes a successful candidate and performs the appropriate replacements: +// +// It will replace all of the CSE defs with assignments to a new "cse0" LclVar +// and will replace all of the CSE uses with reads of the "cse0" LclVar +// +// It will also put cse0 into SSA if there is just one def. +void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) +{ + AdjustHeuristic(successfulCandidate); #ifdef DEBUG - // Setup the message arg for lvaGrabTemp() - // - const char* grabTempMessage = "CSE - unknown"; + // Setup the message arg for lvaGrabTemp() + // + const char* grabTempMessage = "CSE - unknown"; - if (successfulCandidate->IsAggressive()) - { - grabTempMessage = "CSE - aggressive"; - } - else if (successfulCandidate->IsModerate()) - { - grabTempMessage = "CSE - moderate"; - } - else if (successfulCandidate->IsConservative()) - { - grabTempMessage = "CSE - conservative"; - } - else if (successfulCandidate->IsStressCSE()) - { - grabTempMessage = "CSE - stress mode"; - } + if (successfulCandidate->IsAggressive()) + { + grabTempMessage = "CSE - aggressive"; + } + else if (successfulCandidate->IsModerate()) + { + grabTempMessage = "CSE - moderate"; + } + else if (successfulCandidate->IsConservative()) + { + grabTempMessage = "CSE - conservative"; + } + else if (successfulCandidate->IsStressCSE()) + { + grabTempMessage = "CSE - stress mode"; + } #endif // DEBUG - /* Introduce a new temp for the CSE */ - - // we will create a long lifetime temp for the new CSE LclVar - unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG(grabTempMessage)); - var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); - - LclVarDsc* lclDsc = m_pCompiler->lvaGetDesc(cseLclVarNum); - if (cseLclVarTyp == TYP_STRUCT) - { - m_pCompiler->lvaSetStruct(cseLclVarNum, successfulCandidate->Expr()->GetLayout(m_pCompiler), false); - } - lclDsc->lvType = cseLclVarTyp; - lclDsc->lvIsCSE = true; + /* Introduce a new temp for the CSE */ - // Record that we created a new LclVar for use as a CSE temp - m_addCSEcount++; - m_pCompiler->optCSEcount++; + // we will create a long lifetime temp for the new CSE LclVar + unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG(grabTempMessage)); + var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); - // Walk all references to this CSE, adding an assignment - // to the CSE temp to all defs and changing all refs to - // a simple use of the CSE temp. - // - // Later we will unmark any nested CSE's for the CSE uses. - // - Compiler::CSEdsc* dsc = successfulCandidate->CseDsc(); + LclVarDsc* lclDsc = m_pCompiler->lvaGetDesc(cseLclVarNum); + if (cseLclVarTyp == TYP_STRUCT) + { + m_pCompiler->lvaSetStruct(cseLclVarNum, successfulCandidate->Expr()->GetLayout(m_pCompiler), false); + } + lclDsc->lvType = cseLclVarTyp; + lclDsc->lvIsCSE = true; - // If there's just a single def for the CSE, we'll put this - // CSE into SSA form on the fly. We won't need any PHIs. - unsigned cseSsaNum = SsaConfig::RESERVED_SSA_NUM; - LclSsaVarDsc* ssaVarDsc = nullptr; + // Record that we created a new LclVar for use as a CSE temp + m_addCSEcount++; + m_pCompiler->optCSEcount++; - if (dsc->csdDefCount == 1) - { - JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, - cseLclVarNum); - lclDsc->lvInSsa = true; - - // Allocate the ssa num - CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); - cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); - ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); - } - else - { - INDEBUG(lclDsc->lvIsMultiDefCSE = 1); - } + // Walk all references to this CSE, adding an assignment + // to the CSE temp to all defs and changing all refs to + // a simple use of the CSE temp. + // + // Later we will unmark any nested CSE's for the CSE uses. + // + CSEdsc* dsc = successfulCandidate->CseDsc(); - // Verify that all of the ValueNumbers in this list are correct as - // Morph will change them when it performs a mutating operation. - // - bool setRefCnt = true; - bool allSame = true; - bool isSharedConst = successfulCandidate->IsSharedConst(); - ValueNum bestVN = ValueNumStore::NoVN; - bool bestIsDef = false; - ssize_t bestConstValue = 0; - Compiler::treeStmtLst* lst = dsc->csdTreeList; + // If there's just a single def for the CSE, we'll put this + // CSE into SSA form on the fly. We won't need any PHIs. + unsigned cseSsaNum = SsaConfig::RESERVED_SSA_NUM; + LclSsaVarDsc* ssaVarDsc = nullptr; - while (lst != nullptr) - { - // Ignore this node if the gtCSEnum value has been cleared - if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) - { - // We used the liberal Value numbers when building the set of CSE - ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); - assert(currVN != ValueNumStore::NoVN); - ssize_t curConstValue = isSharedConst ? m_pCompiler->vnStore->CoercedConstantValue(currVN) : 0; + if (dsc->csdDefCount == 1) + { + JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, cseLclVarNum); + lclDsc->lvInSsa = true; - GenTree* exp = lst->tslTree; - bool isDef = IS_CSE_DEF(exp->gtCSEnum); + // Allocate the ssa num + CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); + cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); + ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); + } + else + { + INDEBUG(lclDsc->lvIsMultiDefCSE = 1); + } - if (bestVN == ValueNumStore::NoVN) - { - // first entry - // set bestVN - bestVN = currVN; + // Verify that all of the ValueNumbers in this list are correct as + // Morph will change them when it performs a mutating operation. + // + bool setRefCnt = true; + bool allSame = true; + bool isSharedConst = successfulCandidate->IsSharedConst(); + ValueNum bestVN = ValueNumStore::NoVN; + bool bestIsDef = false; + ssize_t bestConstValue = 0; + treeStmtLst* lst = dsc->csdTreeList; - if (isSharedConst) - { - // set bestConstValue and bestIsDef - bestConstValue = curConstValue; - bestIsDef = isDef; - } - } - else if (currVN != bestVN) - { - assert(isSharedConst); // Must be true when we have differing VNs + while (lst != nullptr) + { + // Ignore this node if the gtCSEnum value has been cleared + if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) + { + // We used the liberal Value numbers when building the set of CSE + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); + assert(currVN != ValueNumStore::NoVN); + ssize_t curConstValue = isSharedConst ? m_pCompiler->vnStore->CoercedConstantValue(currVN) : 0; - // subsequent entry - // clear allSame and check for a lower constant - allSame = false; + GenTree* exp = lst->tslTree; + bool isDef = IS_CSE_DEF(exp->gtCSEnum); - ssize_t diff = curConstValue - bestConstValue; + if (bestVN == ValueNumStore::NoVN) + { + // first entry + // set bestVN + bestVN = currVN; - // The ARM addressing modes allow for a subtraction of up to 255 - // so we will allow the diff to be up to -255 before replacing a CSE def - // This will minimize the number of extra subtract instructions. - // - if ((bestIsDef && (diff < -255)) || (!bestIsDef && (diff < 0))) - { - // set new bestVN, bestConstValue and bestIsDef - bestVN = currVN; - bestConstValue = curConstValue; - bestIsDef = isDef; - } + if (isSharedConst) + { + // set bestConstValue and bestIsDef + bestConstValue = curConstValue; + bestIsDef = isDef; } + } + else if (currVN != bestVN) + { + assert(isSharedConst); // Must be true when we have differing VNs - BasicBlock* blk = lst->tslBlock; - weight_t curWeight = blk->getBBWeight(m_pCompiler); + // subsequent entry + // clear allSame and check for a lower constant + allSame = false; - if (setRefCnt) - { - lclDsc->setLvRefCnt(1); - lclDsc->setLvRefCntWtd(curWeight); - setRefCnt = false; - } - else - { - lclDsc->incRefCnts(curWeight, m_pCompiler); - } + ssize_t diff = curConstValue - bestConstValue; - // A CSE Def references the LclVar twice + // The ARM addressing modes allow for a subtraction of up to 255 + // so we will allow the diff to be up to -255 before replacing a CSE def + // This will minimize the number of extra subtract instructions. // - if (isDef) + if ((bestIsDef && (diff < -255)) || (!bestIsDef && (diff < 0))) { - lclDsc->incRefCnts(curWeight, m_pCompiler); - INDEBUG(lclDsc->lvIsHoist |= ((lst->tslTree->gtFlags & GTF_MAKE_CSE) != 0)); + // set new bestVN, bestConstValue and bestIsDef + bestVN = currVN; + bestConstValue = curConstValue; + bestIsDef = isDef; } } - lst = lst->tslNext; + + BasicBlock* blk = lst->tslBlock; + weight_t curWeight = blk->getBBWeight(m_pCompiler); + + if (setRefCnt) + { + lclDsc->setLvRefCnt(1); + lclDsc->setLvRefCntWtd(curWeight); + setRefCnt = false; + } + else + { + lclDsc->incRefCnts(curWeight, m_pCompiler); + } + + // A CSE Def references the LclVar twice + // + if (isDef) + { + lclDsc->incRefCnts(curWeight, m_pCompiler); + INDEBUG(lclDsc->lvIsHoist |= ((lst->tslTree->gtFlags & GTF_MAKE_CSE) != 0)); + } } + lst = lst->tslNext; + } - dsc->csdConstDefValue = bestConstValue; - dsc->csdConstDefVN = bestVN; + dsc->csdConstDefValue = bestConstValue; + dsc->csdConstDefVN = bestVN; #ifdef DEBUG - if (m_pCompiler->verbose) + if (m_pCompiler->verbose) + { + if (!allSame) { - if (!allSame) + if (isSharedConst) { - if (isSharedConst) - { - printf("\nWe have shared Const CSE's and selected " FMT_VN " with a value of 0x%p as the base.\n", - dsc->csdConstDefVN, dspPtr(dsc->csdConstDefValue)); - } - else // !isSharedConst + printf("\nWe have shared Const CSE's and selected " FMT_VN " with a value of 0x%p as the base.\n", + dsc->csdConstDefVN, dspPtr(dsc->csdConstDefValue)); + } + else // !isSharedConst + { + lst = dsc->csdTreeList; + GenTree* firstTree = lst->tslTree; + printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName, + GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); + while (lst != nullptr) { - lst = dsc->csdTreeList; - GenTree* firstTree = lst->tslTree; - printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName, - GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); - while (lst != nullptr) + if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) { - if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) - { - ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); - printf("[%06d](%s " FMT_VN ") ", m_pCompiler->dspTreeID(lst->tslTree), - IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def", currVN); - } - lst = lst->tslNext; + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); + printf("[%06d](%s " FMT_VN ") ", m_pCompiler->dspTreeID(lst->tslTree), + IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def", currVN); } - printf("\n"); + lst = lst->tslNext; } + printf("\n"); } } + } #endif // DEBUG - // Setup 'lst' to point at the start of this candidate list - lst = dsc->csdTreeList; - noway_assert(lst); + // Setup 'lst' to point at the start of this candidate list + lst = dsc->csdTreeList; + noway_assert(lst); - do - { - /* Process the next node in the list */ - GenTree* const exp = lst->tslTree; - Statement* const stmt = lst->tslStmt; - BasicBlock* const blk = lst->tslBlock; + do + { + /* Process the next node in the list */ + GenTree* const exp = lst->tslTree; + Statement* const stmt = lst->tslStmt; + BasicBlock* const blk = lst->tslBlock; - /* Advance to the next node in the list */ - lst = lst->tslNext; + /* Advance to the next node in the list */ + lst = lst->tslNext; - // We may have cleared this CSE in optValuenumCSE_Availability - // due to different exception sets. - // - // Ignore this node if the gtCSEnum value has been cleared - if (!IS_CSE_INDEX(exp->gtCSEnum)) - { - continue; - } + // We may have cleared this CSE in optValuenumCSE_Availability + // due to different exception sets. + // + // Ignore this node if the gtCSEnum value has been cleared + if (!IS_CSE_INDEX(exp->gtCSEnum)) + { + continue; + } - // Assert if we used DEBUG_DESTROY_NODE on this CSE exp - assert(exp->gtOper != GT_COUNT); + // Assert if we used DEBUG_DESTROY_NODE on this CSE exp + assert(exp->gtOper != GT_COUNT); - /* Make sure we update the weighted ref count correctly */ - m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler); + /* Make sure we update the weighted ref count correctly */ + m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler); - /* Figure out the actual type of the value */ - var_types expTyp = genActualType(exp->TypeGet()); + /* Figure out the actual type of the value */ + var_types expTyp = genActualType(exp->TypeGet()); - // The cseLclVarType must be a compatible with expTyp - // - ValueNumStore* vnStore = m_pCompiler->vnStore; - noway_assert(IsCompatibleType(cseLclVarTyp, expTyp) || (dsc->csdConstDefVN != vnStore->VNForNull())); + // The cseLclVarType must be a compatible with expTyp + // + ValueNumStore* vnStore = m_pCompiler->vnStore; + noway_assert(IsCompatibleType(cseLclVarTyp, expTyp) || (dsc->csdConstDefVN != vnStore->VNForNull())); - // This will contain the replacement tree for exp - // It will either be the CSE def or CSE ref - // - GenTree* cse = nullptr; - bool isDef; + // This will contain the replacement tree for exp + // It will either be the CSE def or CSE ref + // + GenTree* cse = nullptr; + bool isDef; - if (IS_CSE_USE(exp->gtCSEnum)) - { - /* This is a use of the CSE */ - isDef = false; + if (IS_CSE_USE(exp->gtCSEnum)) + { + /* This is a use of the CSE */ + isDef = false; #ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("\nWorking on the replacement of the " FMT_CSE " use at ", exp->gtCSEnum); - Compiler::printTreeID(exp); - printf(" in " FMT_BB "\n", blk->bbNum); - } + if (m_pCompiler->verbose) + { + printf("\nWorking on the replacement of the " FMT_CSE " use at ", exp->gtCSEnum); + Compiler::printTreeID(exp); + printf(" in " FMT_BB "\n", blk->bbNum); + } #endif // DEBUG - // We will replace the CSE ref with a new tree - // this is typically just a simple use of the new CSE LclVar - // + // We will replace the CSE ref with a new tree + // this is typically just a simple use of the new CSE LclVar + // - // Create a reference to the CSE temp - GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); - cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); + // Create a reference to the CSE temp + GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); + // Assign the ssa num for the lclvar use. Note it may be the reserved num. + cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } + // If this local is in ssa, notify ssa there's a new use. + if (ssaVarDsc != nullptr) + { + ssaVarDsc->AddUse(blk); + } - cse = cseLclVar; - if (isSharedConst) + cse = cseLclVar; + if (isSharedConst) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) { - ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); - ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); - ssize_t delta = curValue - dsc->csdConstDefValue; - if (delta != 0) - { - GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); - cse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); - cse->SetDoNotCSE(); - } + GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); + cse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); + cse->SetDoNotCSE(); } + } - // assign the proper ValueNumber, A CSE use discards any exceptions - cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair); + // assign the proper ValueNumber, A CSE use discards any exceptions + cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair); - // shared const CSE has the correct value number assigned - // and both liberal and conservative are identical - // and they do not use theConservativeVN - // - if (!isSharedConst) + // shared const CSE has the correct value number assigned + // and both liberal and conservative are identical + // and they do not use theConservativeVN + // + if (!isSharedConst) + { + ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN; + + if (theConservativeVN != ValueNumStore::NoVN) { - ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN; + // All defs of this CSE share the same normal conservative VN, and we are rewriting this + // use to fetch the same value with no reload, so we can safely propagate that + // conservative VN to this use. This can help range check elimination later on. + cse->gtVNPair.SetConservative(theConservativeVN); - if (theConservativeVN != ValueNumStore::NoVN) + // If the old VN was flagged as a checked bound, propagate that to the new VN + // to make sure assertion prop will pay attention to this VN. + ValueNum oldVN = exp->gtVNPair.GetConservative(); + if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN)) { - // All defs of this CSE share the same normal conservative VN, and we are rewriting this - // use to fetch the same value with no reload, so we can safely propagate that - // conservative VN to this use. This can help range check elimination later on. - cse->gtVNPair.SetConservative(theConservativeVN); - - // If the old VN was flagged as a checked bound, propagate that to the new VN - // to make sure assertion prop will pay attention to this VN. - ValueNum oldVN = exp->gtVNPair.GetConservative(); - if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN)) - { - vnStore->SetVNIsCheckedBound(theConservativeVN); - } + vnStore->SetVNIsCheckedBound(theConservativeVN); + } - GenTree* cmp; - if ((m_pCompiler->optCseCheckedBoundMap != nullptr) && - (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp))) - { - // Propagate the new value number to this compare node as well, since - // subsequent range check elimination will try to correlate it with - // the other appearances that are getting CSEd. + GenTree* cmp; + if ((m_pCompiler->optCseCheckedBoundMap != nullptr) && + (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp))) + { + // Propagate the new value number to this compare node as well, since + // subsequent range check elimination will try to correlate it with + // the other appearances that are getting CSEd. - ValueNum oldCmpVN = cmp->gtVNPair.GetConservative(); - ValueNum newCmpArgVN; + ValueNum oldCmpVN = cmp->gtVNPair.GetConservative(); + ValueNum newCmpArgVN; - ValueNumStore::CompareCheckedBoundArithInfo info; - if (vnStore->IsVNCompareCheckedBound(oldCmpVN)) - { - // Comparison is against the bound directly. + ValueNumStore::CompareCheckedBoundArithInfo info; + if (vnStore->IsVNCompareCheckedBound(oldCmpVN)) + { + // Comparison is against the bound directly. - newCmpArgVN = theConservativeVN; - vnStore->GetCompareCheckedBound(oldCmpVN, &info); - } - else - { - // Comparison is against the bound +/- some offset. + newCmpArgVN = theConservativeVN; + vnStore->GetCompareCheckedBound(oldCmpVN, &info); + } + else + { + // Comparison is against the bound +/- some offset. - assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN)); - vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info); - newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper, - info.arrOp, theConservativeVN); - } - ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper, - info.cmpOp, newCmpArgVN); - cmp->gtVNPair.SetConservative(newCmpVN); + assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN)); + vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info); + newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper, + info.arrOp, theConservativeVN); } + ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper, + info.cmpOp, newCmpArgVN); + cmp->gtVNPair.SetConservative(newCmpVN); } } + } #ifdef DEBUG - cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF; + cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF; #endif // DEBUG - // Now we need to unmark any nested CSE's uses that are found in 'exp' - // As well we extract any nested CSE defs that are found in 'exp' and - // these are appended to the sideEffList - - // Afterwards the set of nodes in the 'sideEffectList' are preserved and - // all other nodes are removed. - // - exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field + // Now we need to unmark any nested CSE's uses that are found in 'exp' + // As well we extract any nested CSE defs that are found in 'exp' and + // these are appended to the sideEffList - GenTree* sideEffList = nullptr; - m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE); + // Afterwards the set of nodes in the 'sideEffectList' are preserved and + // all other nodes are removed. + // + exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field - // If we have any side effects or extracted CSE defs then we need to create a GT_COMMA tree instead - // - if (sideEffList != nullptr) - { -#ifdef DEBUG - if (m_pCompiler->verbose) - { - printf("\nThis CSE use has side effects and/or nested CSE defs. The sideEffectList:\n"); - m_pCompiler->gtDispTree(sideEffList); - printf("\n"); - } -#endif - ValueNumPair sideEffExcSet = vnStore->VNPExceptionSet(sideEffList->gtVNPair); - ValueNumPair cseWithSideEffVNPair = vnStore->VNPWithExc(cse->gtVNPair, sideEffExcSet); + GenTree* sideEffList = nullptr; + m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE); - // Create a comma node with the sideEffList as op1 - cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cse); - cse->gtVNPair = cseWithSideEffVNPair; - } - } - else + // If we have any side effects or extracted CSE defs then we need to create a GT_COMMA tree instead + // + if (sideEffList != nullptr) { - /* This is a def of the CSE */ - isDef = true; #ifdef DEBUG if (m_pCompiler->verbose) { - printf("\n" FMT_CSE " def at ", GET_CSE_INDEX(exp->gtCSEnum)); - Compiler::printTreeID(exp); - printf(" replaced in " FMT_BB " with def of V%02u\n", blk->bbNum, cseLclVarNum); + printf("\nThis CSE use has side effects and/or nested CSE defs. The sideEffectList:\n"); + m_pCompiler->gtDispTree(sideEffList); + printf("\n"); } +#endif + ValueNumPair sideEffExcSet = vnStore->VNPExceptionSet(sideEffList->gtVNPair); + ValueNumPair cseWithSideEffVNPair = vnStore->VNPWithExc(cse->gtVNPair, sideEffExcSet); + + // Create a comma node with the sideEffList as op1 + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cse); + cse->gtVNPair = cseWithSideEffVNPair; + } + } + else + { + /* This is a def of the CSE */ + isDef = true; +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\n" FMT_CSE " def at ", GET_CSE_INDEX(exp->gtCSEnum)); + Compiler::printTreeID(exp); + printf(" replaced in " FMT_BB " with def of V%02u\n", blk->bbNum, cseLclVarNum); + } #endif // DEBUG - GenTree* val = exp; - if (isSharedConst) + GenTree* val = exp; + if (isSharedConst) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) { - ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); - ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); - ssize_t delta = curValue - dsc->csdConstDefValue; - if (delta != 0) - { - val = m_pCompiler->gtNewIconNode(dsc->csdConstDefValue, cseLclVarTyp); - val->gtVNPair.SetBoth(dsc->csdConstDefVN); - } + val = m_pCompiler->gtNewIconNode(dsc->csdConstDefValue, cseLclVarTyp); + val->gtVNPair.SetBoth(dsc->csdConstDefVN); } + } - /* Create a store of the value to the temp */ - GenTree* store = m_pCompiler->gtNewTempStore(cseLclVarNum, val); - GenTree* origStore = store; + /* Create a store of the value to the temp */ + GenTree* store = m_pCompiler->gtNewTempStore(cseLclVarNum, val); + GenTree* origStore = store; - if (!store->OperIs(GT_STORE_LCL_VAR)) - { - // This can only be the case for a struct in which the 'val' was a COMMA, so - // the assignment is sunk below it. - store = store->gtEffectiveVal(); - noway_assert(origStore->OperIs(GT_COMMA) && (origStore == val)); - } - else - { - noway_assert(store->Data() == val); - } + if (!store->OperIs(GT_STORE_LCL_VAR)) + { + // This can only be the case for a struct in which the 'val' was a COMMA, so + // the assignment is sunk below it. + store = store->gtEffectiveVal(); + noway_assert(origStore->OperIs(GT_COMMA) && (origStore == val)); + } + else + { + noway_assert(store->Data() == val); + } - // Assign the proper Value Numbers. - store->gtVNPair = ValueNumStore::VNPForVoid(); // The store node itself is $VN.Void. - noway_assert(store->OperIs(GT_STORE_LCL_VAR)); + // Assign the proper Value Numbers. + store->gtVNPair = ValueNumStore::VNPForVoid(); // The store node itself is $VN.Void. + noway_assert(store->OperIs(GT_STORE_LCL_VAR)); - // Backpatch the SSA def, if we're putting this CSE temp into ssa. - store->AsLclVar()->SetSsaNum(cseSsaNum); + // Backpatch the SSA def, if we're putting this CSE temp into ssa. + store->AsLclVar()->SetSsaNum(cseSsaNum); - // Move the information about the CSE def to the store; it now indicates a completed - // CSE def instead of just a candidate. optCSE_canSwap uses this information to reason - // about evaluation order in between substitutions of CSE defs/uses. - store->gtCSEnum = exp->gtCSEnum; - exp->gtCSEnum = NO_CSE; + // Move the information about the CSE def to the store; it now indicates a completed + // CSE def instead of just a candidate. optCSE_canSwap uses this information to reason + // about evaluation order in between substitutions of CSE defs/uses. + store->gtCSEnum = exp->gtCSEnum; + exp->gtCSEnum = NO_CSE; - if (cseSsaNum != SsaConfig::RESERVED_SSA_NUM) - { - LclSsaVarDsc* ssaVarDsc = m_pCompiler->lvaTable[cseLclVarNum].GetPerSsaData(cseSsaNum); + if (cseSsaNum != SsaConfig::RESERVED_SSA_NUM) + { + LclSsaVarDsc* ssaVarDsc = m_pCompiler->lvaTable[cseLclVarNum].GetPerSsaData(cseSsaNum); - // These should not have been set yet, since this is the first and - // only def for this CSE. - assert(ssaVarDsc->GetBlock() == nullptr); - assert(ssaVarDsc->GetDefNode() == nullptr); + // These should not have been set yet, since this is the first and + // only def for this CSE. + assert(ssaVarDsc->GetBlock() == nullptr); + assert(ssaVarDsc->GetDefNode() == nullptr); - ssaVarDsc->m_vnPair = val->gtVNPair; - ssaVarDsc->SetBlock(blk); - ssaVarDsc->SetDefNode(store->AsLclVarCommon()); - } + ssaVarDsc->m_vnPair = val->gtVNPair; + ssaVarDsc->SetBlock(blk); + ssaVarDsc->SetDefNode(store->AsLclVarCommon()); + } - /* Create a reference to the CSE temp */ - GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); - cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); + /* Create a reference to the CSE temp */ + GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); + // Assign the ssa num for the lclvar use. Note it may be the reserved num. + cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } + // If this local is in ssa, notify ssa there's a new use. + if (ssaVarDsc != nullptr) + { + ssaVarDsc->AddUse(blk); + } - GenTree* cseUse = cseLclVar; - if (isSharedConst) + GenTree* cseUse = cseLclVar; + if (isSharedConst) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) { - ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); - ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); - ssize_t delta = curValue - dsc->csdConstDefValue; - if (delta != 0) - { - GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); - cseUse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); - cseUse->SetDoNotCSE(); - } + GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); + cseUse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); + cseUse->SetDoNotCSE(); } - cseUse->gtVNPair = exp->gtVNPair; // The 'cseUse' is equal to the original expression. - - /* Create a comma node for the CSE assignment */ - cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origStore, cseUse); - cse->gtVNPair = cseUse->gtVNPair; // The comma's value is the same as 'val' - // as the assignment to the CSE LclVar - // cannot add any new exceptions } + cseUse->gtVNPair = exp->gtVNPair; // The 'cseUse' is equal to the original expression. + + /* Create a comma node for the CSE assignment */ + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origStore, cseUse); + cse->gtVNPair = cseUse->gtVNPair; // The comma's value is the same as 'val' + // as the assignment to the CSE LclVar + // cannot add any new exceptions + } - cse->CopyReg(exp); // The cse inheirits any reg num property from the original exp node - exp->ClearRegNum(); // The exp node (for a CSE def) no longer has a register requirement + cse->CopyReg(exp); // The cse inheirits any reg num property from the original exp node + exp->ClearRegNum(); // The exp node (for a CSE def) no longer has a register requirement - // Walk the statement 'stmt' and find the pointer - // in the tree is pointing to 'exp' - // - Compiler::FindLinkData linkData = m_pCompiler->gtFindLink(stmt, exp); - GenTree** link = linkData.result; + // Walk the statement 'stmt' and find the pointer + // in the tree is pointing to 'exp' + // + Compiler::FindLinkData linkData = m_pCompiler->gtFindLink(stmt, exp); + GenTree** link = linkData.result; #ifdef DEBUG - if (link == nullptr) - { - printf("\ngtFindLink failed: stm="); - Compiler::printStmtID(stmt); - printf(", exp="); - Compiler::printTreeID(exp); - printf("\n"); - printf("stm ="); - m_pCompiler->gtDispStmt(stmt); - printf("\n"); - printf("exp ="); - m_pCompiler->gtDispTree(exp); - printf("\n"); - } + if (link == nullptr) + { + printf("\ngtFindLink failed: stm="); + Compiler::printStmtID(stmt); + printf(", exp="); + Compiler::printTreeID(exp); + printf("\n"); + printf("stm ="); + m_pCompiler->gtDispStmt(stmt); + printf("\n"); + printf("exp ="); + m_pCompiler->gtDispTree(exp); + printf("\n"); + } #endif // DEBUG - noway_assert(link); + noway_assert(link); - // Mutate this link, thus replacing the old exp with the new CSE representation - // - *link = cse; + // Mutate this link, thus replacing the old exp with the new CSE representation + // + *link = cse; + + assert(m_pCompiler->fgRemoveRestOfBlock == false); + + /* re-morph the statement */ + m_pCompiler->fgMorphBlockStmt(blk, stmt DEBUGARG("optValnumCSE")); + + } while (lst != nullptr); +} - assert(m_pCompiler->fgRemoveRestOfBlock == false); +void CSE_Heuristic::AdjustHeuristic(CSE_Candidate* successfulCandidate) +{ + weight_t cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount(); + + // FACTOR THIS + if (successfulCandidate->LiveAcrossCall() != 0) + { + // As we introduce new LclVars for these CSE we slightly + // increase the cutoffs for aggressive and moderate CSE's + // + weight_t incr = BB_UNITY_WEIGHT; - /* re-morph the statement */ - m_pCompiler->fgMorphBlockStmt(blk, stmt DEBUGARG("optValnumCSE")); + if (cseRefCnt > aggressiveRefCnt) + { + aggressiveRefCnt += incr; + } - } while (lst != nullptr); + if (cseRefCnt > moderateRefCnt) + { + moderateRefCnt += (incr / 2); + } } +} - // Consider each of the CSE candidates and if the CSE passes - // the PromotionCheck then transform the CSE by calling PerformCSE - // - void ConsiderCandidates() +// Consider each of the CSE candidates and if the CSE passes +// the PromotionCheck then transform the CSE by calling PerformCSE +// +void CSE_HeuristicCommon::ConsiderCandidates() +{ + /* Consider each CSE candidate, in order of decreasing cost */ + unsigned cnt = m_pCompiler->optCSECandidateCount; + CSEdsc** ptr = sortTab; + for (; (cnt > 0); cnt--, ptr++) { - /* Consider each CSE candidate, in order of decreasing cost */ - unsigned cnt = m_pCompiler->optCSECandidateCount; - Compiler::CSEdsc** ptr = sortTab; - for (; (cnt > 0); cnt--, ptr++) + CSEdsc* dsc = *ptr; + CSE_Candidate candidate(this, dsc); + + if (dsc->defExcSetPromise == ValueNumStore::NoVN) { - Compiler::CSEdsc* dsc = *ptr; - CSE_Candidate candidate(this, dsc); + JITDUMP("Abandoned " FMT_CSE " because we had defs with different Exc sets\n", candidate.CseIndex()); + continue; + } - if (dsc->defExcSetPromise == ValueNumStore::NoVN) - { - JITDUMP("Abandoned " FMT_CSE " because we had defs with different Exc sets\n", candidate.CseIndex()); - continue; - } + candidate.InitializeCounts(); - candidate.InitializeCounts(); + if (candidate.UseCount() == 0) + { + JITDUMP("Skipped " FMT_CSE " because use count is 0\n", candidate.CseIndex()); + continue; + } - if (candidate.UseCount() == 0) +#ifdef DEBUG + if (m_pCompiler->verbose) + { + if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) { - JITDUMP("Skipped " FMT_CSE " because use count is 0\n", candidate.CseIndex()); - continue; + printf("\nConsidering " FMT_CSE " {$%-3x, $%-3x} [def=%3f, use=%3f, cost=%3u%s]\n", + candidate.CseIndex(), dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(), + candidate.UseCount(), candidate.Cost(), dsc->csdLiveAcrossCall ? ", call" : " "); } - -#ifdef DEBUG - if (m_pCompiler->verbose) + else { - if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) - { - printf("\nConsidering " FMT_CSE " {$%-3x, $%-3x} [def=%3f, use=%3f, cost=%3u%s]\n", - candidate.CseIndex(), dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(), - candidate.UseCount(), candidate.Cost(), dsc->csdLiveAcrossCall ? ", call" : " "); - } - else - { - size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); - printf("\nConsidering " FMT_CSE " {K_%p} [def=%3f, use=%3f, cost=%3u%s]\n", candidate.CseIndex(), - dspPtr(kVal), candidate.DefCount(), candidate.UseCount(), candidate.Cost(), - dsc->csdLiveAcrossCall ? ", call" : " "); - } - printf("CSE Expression : \n"); - m_pCompiler->gtDispTree(candidate.Expr()); - printf("\n"); + size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); + printf("\nConsidering " FMT_CSE " {K_%p} [def=%3f, use=%3f, cost=%3u%s]\n", candidate.CseIndex(), + dspPtr(kVal), candidate.DefCount(), candidate.UseCount(), candidate.Cost(), + dsc->csdLiveAcrossCall ? ", call" : " "); } + printf("CSE Expression : \n"); + m_pCompiler->gtDispTree(candidate.Expr()); + printf("\n"); + } #endif // DEBUG - if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0)) - { - // If we reach this point, then the CSE def was incorrectly marked or the - // block with this use is unreachable. So skip and go to the next CSE. - // Without the "continue", we'd generate bad code in retail. - // Commented out a noway_assert(false) here due to bug: 3290124. - // The problem is if there is sub-graph that is not reachable from the - // entry point, the CSE flags propagated, would be incorrect for it. - continue; - } + if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0)) + { + // If we reach this point, then the CSE def was incorrectly marked or the + // block with this use is unreachable. So skip and go to the next CSE. + // Without the "continue", we'd generate bad code in retail. + // Commented out a noway_assert(false) here due to bug: 3290124. + // The problem is if there is sub-graph that is not reachable from the + // entry point, the CSE flags propagated, would be incorrect for it. + continue; + } - bool doCSE = PromotionCheck(&candidate); + bool doCSE = PromotionCheck(&candidate); #ifdef DEBUG - if (doCSE) - { - const int attempt = m_pCompiler->optCSEattempt++; - - if (m_pCompiler->info.compMethodHash() == (unsigned)JitConfig.JitCSEHash()) - { - // We can only mask the first 32 CSE attempts, so suppress anything beyond that. - // Note methods with >= 32 CSEs are currently quite rare. - // - if (attempt >= 32) - { - doCSE = false; - JITDUMP(FMT_CSE " attempt %u disabled, out of mask range\n", candidate.CseIndex(), attempt); - } - else - { - doCSE = ((1 << attempt) & ((unsigned)JitConfig.JitCSEMask())) != 0; - JITDUMP(FMT_CSE " attempt %u mask 0x%08x: %s\n", candidate.CseIndex(), attempt, - JitConfig.JitCSEMask(), doCSE ? "allowed" : "disabled"); - } - } - } + if (doCSE) + { + const int attempt = m_pCompiler->optCSEattempt++; - if (m_pCompiler->verbose) + if (m_pCompiler->info.compMethodHash() == (unsigned)JitConfig.JitCSEHash()) { - if (doCSE) + // We can only mask the first 32 CSE attempts, so suppress anything beyond that. + // Note methods with >= 32 CSEs are currently quite rare. + // + if (attempt >= 32) { - printf("\nPromoting CSE:\n"); + doCSE = false; + JITDUMP(FMT_CSE " attempt %u disabled, out of mask range\n", candidate.CseIndex(), attempt); } else { - printf("Did Not promote this CSE\n"); + doCSE = ((1 << attempt) & ((unsigned)JitConfig.JitCSEMask())) != 0; + JITDUMP(FMT_CSE " attempt %u mask 0x%08x: %s\n", candidate.CseIndex(), attempt, + JitConfig.JitCSEMask(), doCSE ? "allowed" : "disabled"); } } -#endif // DEBUG + } + if (m_pCompiler->verbose) + { if (doCSE) { - PerformCSE(&candidate); - madeChanges = true; + printf("\nPromoting CSE:\n"); + } + else + { + printf("Did Not promote this CSE\n"); } } - } +#endif // DEBUG - // Perform the necessary cleanup after our CSE heuristics have run - // - void Cleanup() - { - // Nothing to do, currently. + if (doCSE) + { + PerformCSE(&candidate); + madeChanges = true; + } } -}; +} //------------------------------------------------------------------------ // optValnumCSE_Heuristic: Perform common sub-expression elimination @@ -3394,14 +3195,42 @@ bool Compiler::optValnumCSE_Heuristic() } #endif // DEBUG - CSE_Heuristic cse_heuristic(this); + // Determine which heuristic to use... + // + CSE_HeuristicCommon* heuristic = nullptr; + +#ifdef DEBUG + bool useRandomHeuristic = false; + + if (JitConfig.JitRandomCSE() > 0) + { + JITDUMP("Using Random CSE heuristic (JitRandomCSE)\n"); + useRandomHeuristic = true; + } + else if (compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT)) + { + JITDUMP("Using Random CSE heuristic (stress)\n"); + useRandomHeuristic = true; + } + + if (useRandomHeuristic) + { + heuristic = new (this, CMK_CSE) CSE_HeuristicRandom(this); + } +#endif + + if (heuristic == nullptr) + { + JITDUMP("Using standard CSE heuristic\n"); + heuristic = new (this, CMK_CSE) CSE_Heuristic(this); + } - cse_heuristic.Initialize(); - cse_heuristic.SortCandidates(); - cse_heuristic.ConsiderCandidates(); - cse_heuristic.Cleanup(); + heuristic->Initialize(); + heuristic->SortCandidates(); + heuristic->ConsiderCandidates(); + heuristic->Cleanup(); - return cse_heuristic.MadeChanges(); + return heuristic->MadeChanges(); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/optcse.h b/src/coreclr/jit/optcse.h new file mode 100644 index 00000000000000..a74ffa83a436ca --- /dev/null +++ b/src/coreclr/jit/optcse.h @@ -0,0 +1,319 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _OPTCSE_H +#define _OPTCSE_H + +#include "compiler.h" + +struct CSEdsc; +class CSE_Candidate; + +// Base class for CSE Heuristics +// +// Also usable as a "do nothing" heuristic. +// +class CSE_HeuristicCommon +{ +protected: + CSE_HeuristicCommon(Compiler*); + + Compiler* m_pCompiler; + unsigned m_addCSEcount; + CSEdsc** sortTab; + size_t sortSiz; + bool madeChanges; + Compiler::codeOptimize codeOptKind; + +public: + virtual void Initialize() + { + } + + virtual void SortCandidates() + { + } + + virtual bool PromotionCheck(CSE_Candidate* candidate) + { + return false; + } + virtual void PerformCSE(CSE_Candidate* candidate); + + virtual void Cleanup() + { + } + + virtual bool ConsiderTree(GenTree* tree) + { + return false; + } + + virtual void AdjustHeuristic(CSE_Candidate* candidate) + { + } + + void ConsiderCandidates(); + + bool MadeChanges() const + { + return madeChanges; + } + + Compiler::codeOptimize CodeOptKind() const + { + return codeOptKind; + } + + bool IsCompatibleType(var_types cseLclVarTyp, var_types expTyp); +}; + +#ifdef DEBUG +// Randomized CSE heuristic +// +// Performs CSEs randomly, useful for stress +// +class CSE_HeuristicRandom : public CSE_HeuristicCommon +{ + +private: + CLRRandom m_cseRNG; + unsigned m_bias; + +public: + CSE_HeuristicRandom(Compiler*); + void SortCandidates(); + bool PromotionCheck(CSE_Candidate* candidate); +}; + +#endif + +// Standard CSE heuristic +// +// The following class handles the CSE heuristics +// we use a complex set of heuristic rules +// to determine if it is likely to be profitable to perform this CSE +// +class CSE_Heuristic : public CSE_HeuristicCommon +{ +private: + weight_t aggressiveRefCnt; + weight_t moderateRefCnt; + unsigned enregCount; // count of the number of predicted enregistered variables + bool largeFrame; + bool hugeFrame; + +public: + CSE_Heuristic(Compiler*); + + void Initialize(); + void SortCandidates(); + bool PromotionCheck(CSE_Candidate* candidate); + void AdjustHeuristic(CSE_Candidate* candidate); +}; + +// Generic list of nodes - used by the CSE logic + +struct treeStmtLst +{ + treeStmtLst* tslNext; + GenTree* tslTree; // tree node + Statement* tslStmt; // statement containing the tree + BasicBlock* tslBlock; // block containing the statement +}; + +// The following logic keeps track of expressions via a simple hash table. + +struct CSEdsc +{ + CSEdsc* csdNextInBucket; // used by the hash table + size_t csdHashKey; // the original hashkey + ssize_t csdConstDefValue; // When we CSE similar constants, this is the value that we use as the def + ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar + // assignment + unsigned csdIndex; // 1..optCSECandidateCount + bool csdIsSharedConst; // true if this CSE is a shared const + bool csdLiveAcrossCall; + + unsigned short csdDefCount; // definition count + unsigned short csdUseCount; // use count (excluding the implicit uses at defs) + + weight_t csdDefWtCnt; // weighted def count + weight_t csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) + + GenTree* csdTree; // treenode containing the 1st occurrence + Statement* csdStmt; // stmt containing the 1st occurrence + BasicBlock* csdBlock; // block containing the 1st occurrence + + treeStmtLst* csdTreeList; // list of matching tree nodes: head + treeStmtLst* csdTreeLast; // list of matching tree nodes: tail + + // The exception set that is now required for all defs of this CSE. + // This will be set to NoVN if we decide to abandon this CSE + ValueNum defExcSetPromise; + + // The set of exceptions we currently can use for CSE uses. + ValueNum defExcSetCurrent; + + // if all def occurrences share the same conservative normal value + // number, this will reflect it; otherwise, NoVN. + // not used for shared const CSE's + ValueNum defConservNormVN; +}; + +// The following class nested within CSE_Heuristic encapsulates the information +// about the current CSE candidate that is under consideration +// +// TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation +// and needs to be reworked for the Value Number based implementation +// +class CSE_Candidate +{ + CSE_HeuristicCommon* m_context; + CSEdsc* m_CseDsc; + + unsigned m_cseIndex; + weight_t m_defCount; + weight_t m_useCount; + unsigned m_Cost; + unsigned m_Size; + + // When this Candidate is successfully promoted to a CSE we record + // the following information about what category was used when promoting it. + // + // We will set m_Aggressive: + // When we believe that the CSE very valuable in terms of weighted ref counts, + // such that it would always be enregistered by the register allocator. + // + // We will set m_Moderate: + // When we believe that the CSE is moderately valuable in terms of weighted ref counts, + // such that it is more likely than not to be enregistered by the register allocator + // + // We will set m_Conservative: + // When we didn't set m_Aggressive or m_Moderate. + // Such candidates typically are expensive to compute and thus are + // always profitable to promote even when they aren't enregistered. + // + // We will set m_StressCSE: + // When the candidate is only being promoted because of a Stress mode. + // + bool m_Aggressive; + bool m_Moderate; + bool m_Conservative; + bool m_StressCSE; + +public: + CSE_Candidate(CSE_HeuristicCommon* context, CSEdsc* cseDsc) + : m_context(context) + , m_CseDsc(cseDsc) + , m_cseIndex(m_CseDsc->csdIndex) + , m_defCount(0) + , m_useCount(0) + , m_Cost(0) + , m_Size(0) + , m_Aggressive(false) + , m_Moderate(false) + , m_Conservative(false) + , m_StressCSE(false) + { + } + + CSEdsc* CseDsc() + { + return m_CseDsc; + } + unsigned CseIndex() + { + return m_cseIndex; + } + weight_t DefCount() + { + return m_defCount; + } + weight_t UseCount() + { + return m_useCount; + } + // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. + GenTree* Expr() + { + return m_CseDsc->csdTree; + } + unsigned Cost() + { + return m_Cost; + } + unsigned Size() + { + return m_Size; + } + + bool IsSharedConst() + { + return m_CseDsc->csdIsSharedConst; + } + + bool LiveAcrossCall() + { + return m_CseDsc->csdLiveAcrossCall; + } + + void SetAggressive() + { + m_Aggressive = true; + } + + bool IsAggressive() + { + return m_Aggressive; + } + + void SetModerate() + { + m_Moderate = true; + } + + bool IsModerate() + { + return m_Moderate; + } + + void SetConservative() + { + m_Conservative = true; + } + + bool IsConservative() + { + return m_Conservative; + } + + void SetStressCSE() + { + m_StressCSE = true; + } + + bool IsStressCSE() + { + return m_StressCSE; + } + + void InitializeCounts() + { + m_Size = Expr()->GetCostSz(); // always the GetCostSz() + if (m_context->CodeOptKind() == Compiler::SMALL_CODE) + { + m_Cost = m_Size; // the estimated code size + m_defCount = m_CseDsc->csdDefCount; // def count + m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs) + } + else + { + m_Cost = Expr()->GetCostEx(); // the estimated execution cost + m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count + m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) + } + } +}; + +#endif // _OPTCSE_H