From c86effce5b86e9d92db8cc7052a02b00b8021435 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 30 Jan 2024 14:51:01 +0100 Subject: [PATCH 01/64] WIP --- src/coreclr/jit/CMakeLists.txt | 13 ++-- src/coreclr/jit/compiler.cpp | 9 +++ src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/compmemkind.h | 1 + src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/inductionvariableopts.cpp | 79 +++++++++++++++++++++++ src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/redundantbranchopts.cpp | 1 - 8 files changed, 100 insertions(+), 7 deletions(-) create mode 100644 src/coreclr/jit/inductionvariableopts.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 480d9d50350ddc..4d7dc333a7dd4b 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -94,7 +94,6 @@ set( JIT_SOURCES bitset.cpp block.cpp buildstring.cpp - layout.cpp codegencommon.cpp codegenlinear.cpp compiler.cpp @@ -123,20 +122,22 @@ set( JIT_SOURCES gentree.cpp gschecks.cpp hashbv.cpp - hwintrinsic.cpp + helperexpansion.cpp hostallocator.cpp + hwintrinsic.cpp ifconversion.cpp - helperexpansion.cpp - indirectcalltransformer.cpp - importercalls.cpp importer.cpp + importercalls.cpp importervectorization.cpp + indirectcalltransformer.cpp + inductionvariableopts.cpp inline.cpp inlinepolicy.cpp instr.cpp jitconfig.cpp jiteh.cpp jithashtable.cpp + layout.cpp lclmorph.cpp lclvars.cpp likelyclass.cpp @@ -151,7 +152,6 @@ set( JIT_SOURCES objectalloc.cpp optcse.cpp optimizebools.cpp - switchrecognition.cpp optimizer.cpp patchpoint.cpp phase.cpp @@ -172,6 +172,7 @@ set( JIT_SOURCES ssabuilder.cpp ssarenamestate.cpp stacklevelsetter.cpp + switchrecognition.cpp treelifeupdater.cpp unwind.cpp utils.cpp diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b2cf65a9811239..676262a4e65658 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4881,6 +4881,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl bool doValueNum = true; bool doLoopHoisting = true; bool doCopyProp = true; + bool doOptimizeIVs = true; bool doBranchOpt = true; bool doCse = true; bool doAssertionProp = true; @@ -4893,6 +4894,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl doSsa = (JitConfig.JitDoSsa() != 0); doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0); doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0); + doOptimizeIVs = doSsa && (JitConfig.JitDoOptimizeIVs() != 0); doLoopHoisting = doValueNum && (JitConfig.JitDoLoopHoisting() != 0); doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0); doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0); @@ -4931,6 +4933,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_EARLY_PROP, &Compiler::optEarlyProp); } + if (doOptimizeIVs) + { + // Simplify and optimize induction variables used in natural loops + // + DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables); + } + if (doValueNum) { // Value number the trees diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ee779d375939d3..17756520385634 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7377,6 +7377,8 @@ class Compiler BasicBlock* basicBlock); #endif + PhaseStatus optInductionVariables(); + // Redundant branch opts // PhaseStatus optRedundantBranches(); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 835d85f798d29b..03379196df9278 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -50,6 +50,7 @@ CompMemKindMacro(LoopOpt) CompMemKindMacro(LoopClone) CompMemKindMacro(LoopUnroll) CompMemKindMacro(LoopHoist) +CompMemKindMacro(LoopScalarEvolution) CompMemKindMacro(Unknown) CompMemKindMacro(RangeCheck) CompMemKindMacro(CopyProp) diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 23930985319769..10b60167be4224 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -84,6 +84,7 @@ CompPhaseNameMacro(PHASE_BUILD_SSA_DF, "SSA: DF", CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", false, -1, false) +CompPhaseNameMacro(PHASE_OPTIMIZE_INDUCTION_VARIABLES, "Optimize Induction Variables", false, -1, false) CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", false, -1, false) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp new file mode 100644 index 00000000000000..12f2d4d416f2b2 --- /dev/null +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +// Represents a description of how a tree's value changes as a loop iterates. +struct ScalarEvolution +{ + +#ifdef DEBUG + void Dump() + { + + } +#endif +}; + +typedef JitHashTable, ScalarEvolution> ScalarEvolutionMap; + +class ScalarEvolutionContext +{ + ScalarEvolutionMap m_map; + +public: + ScalarEvolutionContext(Compiler* comp) + : m_map(comp->getAllocator(CMK_LoopScalarEvolution)) + { + } + + ScalarEvolution* Analyze(GenTree* tree) + { + + } +}; + +//------------------------------------------------------------------------ +// optInductionVariables: Try and optimize induction variables in the method. +// +// Returns: +// PhaseStatus indicating if anything changed. +// +PhaseStatus Compiler::optInductionVariables() +{ + JITDUMP("*************** In optInductionVariables()\n"); + + JITDUMP("\n"); + fgDispBasicBlocks(true); + + ScalarEvolutionContext scevContext(this); + + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) + { + JITDUMP("Analyzing scalar evolution in "); + FlowGraphNaturalLoop::Dump(loop); + + for (Statement* stmt : loop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + { + break; + } + + GenTreeLclVarCommon* phiDef = stmt->GetRootNode()->AsLclVarCommon(); + unsigned lclNum = phiDef->GetLclNum(); + GenTreePhi* phi = phiDef->Data()->AsPhi(); + + GenTree* backedgeDef = nullptr; + GenTree* initDef = nullptr; + + for (GenTreePhi::Use& use : phi->Uses()) + { + GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); + + } + } + } + + return PhaseStatus::MODIFIED_NOTHING; +} diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index fc3e2f30a3d23c..c1d6a1ad5f6932 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -450,6 +450,7 @@ CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range #if defined(OPT_CONFIG) CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant +CONFIG_INTEGER(JitDoOptimizeIVs, W("JitDoOptimizeIVs"), 1) // Perform optimization of induction variables CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values CONFIG_INTEGER(JitDoLoopInversion, W("JitDoLoopInversion"), 1) // Perform loop inversion on "for/while" loops diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 38f614d8d3c8c3..00e0440c55f920 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -11,7 +11,6 @@ // PhaseStatus Compiler::optRedundantBranches() { - #if DEBUG if (verbose) { From f8f899e1accd5795295a3de2f60f035460afae5c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 1 Feb 2024 11:46:34 +0100 Subject: [PATCH 02/64] WIP --- src/coreclr/jit/inductionvariableopts.cpp | 610 +++++++++++++++++++++- src/coreclr/jit/jitconfigvalues.h | 6 +- 2 files changed, 587 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 12f2d4d416f2b2..77cad6cbf86781 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -3,36 +3,592 @@ #include "jitpch.h" -// Represents a description of how a tree's value changes as a loop iterates. -struct ScalarEvolution +enum class ScevOper { + Constant, + Local, + ZeroExtend, + SignExtend, + Add, + Mul, + Lsh, + AddRec, +}; + +static bool ScevOperIs(ScevOper oper, ScevOper otherOper) +{ + return oper == otherOper; +} + +template +static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail) +{ + return oper == operFirst || ScevOperIs(oper, operTail...); +} + +struct Scev +{ + ScevOper Oper; + var_types Type; + + Scev(ScevOper oper, var_types type) : Oper(oper), Type(type) + { + } + + template + bool OperIs(Args... opers) + { + return ScevOperIs(Oper, opers...); + } -#ifdef DEBUG - void Dump() + bool TypeIs(var_types type) { + return Type == type; + } +}; + +struct ScevConstant : Scev +{ + ScevConstant(var_types type, int64_t value) : Scev(ScevOper::Constant, type), Value(value) + { + } + int64_t Value; +}; + +struct ScevLocal : Scev +{ + ScevLocal(var_types type, unsigned lclNum, unsigned ssaNum) + : Scev(ScevOper::Local, type), LclNum(lclNum), SsaNum(ssaNum) + { } -#endif + + unsigned LclNum; + unsigned SsaNum; }; -typedef JitHashTable, ScalarEvolution> ScalarEvolutionMap; +struct ScevUnop : Scev +{ + ScevUnop(ScevOper oper, var_types type, Scev* op1) : Scev(oper, type), Op1(op1) + { + } + + Scev* Op1; +}; + +struct ScevBinop : ScevUnop +{ + ScevBinop(ScevOper oper, var_types type, Scev* op1, Scev* op2) : ScevUnop(oper, type, op1), Op2(op2) + { + } + + Scev* Op2; +}; + +// Represents a value that evolves by an add recurrence. +// The value at iteration N is Start + N * Step. +// "Step" is guaranteed to be invariant in "Loop". +struct ScevAddRec : Scev +{ + ScevAddRec(var_types type, FlowGraphNaturalLoop* loop, Scev* start, Scev* step) + : Scev(ScevOper::AddRec, type), Loop(loop), Start(start), Step(step) + { + } + + FlowGraphNaturalLoop* Loop; + Scev* Start; + Scev* Step; +}; + +typedef JitHashTable, Scev*> ScalarEvolutionMap; + +static void DumpScev(Scev* scev) +{ + switch (scev->Oper) + { + case ScevOper::Constant: + { + ScevConstant* cns = (ScevConstant*)scev; + printf("%zd", (ssize_t)cns->Value); + break; + } + case ScevOper::Local: + { + ScevLocal* invariantLocal = (ScevLocal*)scev; + printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); + break; + } + case ScevOper::ZeroExtend: + case ScevOper::SignExtend: + { + ScevUnop* unop = (ScevUnop*)scev; + printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8); + DumpScev(unop->Op1); + printf(")"); + break; + } + case ScevOper::Add: + case ScevOper::Mul: + case ScevOper::Lsh: + { + ScevBinop* binop = (ScevBinop*)scev; + printf("("); + DumpScev(binop->Op1); + const char* op; + switch (binop->Oper) + { + case ScevOper::Add: op = "+"; break; + case ScevOper::Mul: op = "*"; break; + case ScevOper::Lsh: op = "<<"; break; + default: unreached(); + } + printf(" %s ", op); + DumpScev(binop->Op2); + printf(")"); + break; + } + case ScevOper::AddRec: + { + ScevAddRec* addRec = (ScevAddRec*)scev; + printf("<" FMT_LP, addRec->Loop->GetIndex()); + printf(", "); + DumpScev(addRec->Start); + printf(", "); + DumpScev(addRec->Step); + printf(">"); + break; + } + default: + unreached(); + } +} class ScalarEvolutionContext { + Compiler* m_comp; ScalarEvolutionMap m_map; + Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); + GenTreeLclVarCommon* GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock); + bool IsInvariantInLoop(Scev* scev, FlowGraphNaturalLoop* loop); + public: - ScalarEvolutionContext(Compiler* comp) - : m_map(comp->getAllocator(CMK_LoopScalarEvolution)) + ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_map(comp->getAllocator(CMK_LoopScalarEvolution)) + { + } + + ScevConstant* NewConstant(var_types type, int64_t value) + { + ScevConstant* constant = new (m_comp, CMK_LoopScalarEvolution) ScevConstant(type, value); + return constant; + } + + ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) + { + var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); + ScevLocal* invariantLocal = + new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); + return invariantLocal; + } + + ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) { + assert(op != nullptr); + ScevUnop* ext = new (m_comp, CMK_LoopScalarEvolution) ScevUnop(oper, targetType, op); + return ext; } - ScalarEvolution* Analyze(GenTree* tree) + ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) { + assert((op1 != nullptr) && (op2 != nullptr)); + ScevBinop* binop = new (m_comp, CMK_LoopScalarEvolution) ScevBinop(oper, op1->Type, op1, op2); + return binop; + } + ScevAddRec* NewAddRec(FlowGraphNaturalLoop* loop, Scev* start, Scev* step) + { + assert((start != nullptr) && (step != nullptr)); + ScevAddRec* addRec = new (m_comp, CMK_LoopScalarEvolution) ScevAddRec(start->Type, loop, start, step); + return addRec; } + + Scev* Analyze(BasicBlock* block, GenTree* tree); + + Scev* Fold(Scev* scev); }; +GenTreeLclVarCommon* ScalarEvolutionContext::GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock) +{ + assert(lcl->OperIs(GT_LCL_VAR, GT_PHI_ARG)); + if (!lcl->HasSsaName()) + return nullptr; + + LclVarDsc* dsc = m_comp->lvaGetDesc(lcl); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(lcl->GetSsaNum()); + GenTreeLclVarCommon* ssaDef = ssaDsc->GetDefNode(); + if (ssaDef == nullptr) + { + assert(lcl->GetSsaNum() == SsaConfig::FIRST_SSA_NUM); + // TODO: We should handle zero-inited locals and parameters in some proper way... + return nullptr; + } + assert(ssaDef->OperIsLocalStore()); + *defBlock = ssaDsc->GetBlock(); + return ssaDef; +} + +bool ScalarEvolutionContext::IsInvariantInLoop(Scev* scev, FlowGraphNaturalLoop* loop) +{ + switch (scev->Oper) + { + case ScevOper::Constant: + { + return true; + } + case ScevOper::Local: + { + ScevLocal* invariantLocal = (ScevLocal*)scev; + LclVarDsc* dsc = m_comp->lvaGetDesc(invariantLocal->LclNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(invariantLocal->SsaNum); + return (ssaDsc->GetBlock() == nullptr) || !loop->ContainsBlock(ssaDsc->GetBlock()); + } + case ScevOper::ZeroExtend: + case ScevOper::SignExtend: + { + ScevUnop* unop = (ScevUnop*)scev; + return IsInvariantInLoop(unop->Op1, loop); + } + case ScevOper::Add: + case ScevOper::Mul: + case ScevOper::Lsh: + { + ScevBinop* binop = (ScevBinop*)scev; + return IsInvariantInLoop(binop->Op1, loop) && IsInvariantInLoop(binop->Op2, loop); + } + case ScevOper::AddRec: + { + ScevAddRec* addRec = (ScevAddRec*)scev; + return !loop->ContainsBlock(addRec->Loop->GetHeader()); + } + default: + unreached(); + } +} + +Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) +{ + switch (tree->OperGet()) + { + case GT_CNS_INT: + case GT_CNS_LNG: + { + return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); + } + case GT_LCL_VAR: + { + BasicBlock* defBlock; + GenTreeLclVarCommon* def = GetSsaDef(tree->AsLclVarCommon(), &defBlock); + if (def == nullptr) + { + if (m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())) + { + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); + } + else + { + return nullptr; + } + } + + return Analyze(defBlock, def); + } + case GT_STORE_LCL_VAR: + { + GenTreeLclVarCommon* store = tree->AsLclVarCommon(); + GenTree* data = store->Data(); + if (!data->OperIs(GT_PHI)) + { + return Analyze(block, data); + } + + // We have a phi def. Look for a primary induction variable. + FlowGraphNaturalLoop* phiLoop = m_comp->m_blockToLoop->GetLoop(block); + if (phiLoop->GetHeader() != block) + { + return nullptr; + } + + GenTreePhi* phi = data->AsPhi(); + GenTreePhiArg* enterSsa = nullptr; + GenTreePhiArg* backedgeSsa = nullptr; + + for (GenTreePhi::Use& use : phi->Uses()) + { + GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); + GenTreePhiArg*& ssaArg = phiLoop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa; + if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum())) + { + ssaArg = phiArg; + } + else + { + return nullptr; + } + } + + if ((enterSsa == nullptr) || (backedgeSsa == nullptr)) + { + return nullptr; + } + + BasicBlock* stepDefBlock; + GenTreeLclVarCommon* stepDef = GetSsaDef(backedgeSsa, &stepDefBlock); + if (stepDef == nullptr) + { + return nullptr; + } + + GenTree* stepDefData = stepDef->Data(); + + if (!stepDefData->OperIs(GT_ADD)) + { + // TODO: Handle patterns like: + // + // int i = 0; + // while (true) + // { + // int j = i + 1; + // ... + // i = j; + // } + // + // I think we can eagerly insert a node in the cache for + // "store"; we'll end up with some SCEV with a cycle in it that + // is going to look a bit like a µ-type, e.g. µ.µ + 1, that can + // be translated back to an add recurrence. + return nullptr; + } + + GenTree* stepTree; + GenTree* op1 = stepDefData->gtGetOp1(); + GenTree* op2 = stepDefData->gtGetOp2(); + if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == store->GetLclNum()) && + (op1->AsLclVar()->GetSsaNum() == store->GetSsaNum())) + { + stepTree = op2; + } + else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == store->GetLclNum()) && + (op2->AsLclVar()->GetSsaNum() == store->GetSsaNum())) + { + stepTree = op1; + } + else + { + return nullptr; + } + + Scev* step = Analyze(stepDefBlock, stepTree); + + if ((step == nullptr) || !IsInvariantInLoop(step, phiLoop)) + { + return nullptr; + } + + BasicBlock* enterDefBlock; + GenTreeLclVarCommon* enterDef = GetSsaDef(enterSsa, &enterDefBlock); + if (enterDef == nullptr) + { + return nullptr; + } + + Scev* enterScev = Analyze(enterDefBlock, enterDef); + if (enterScev == nullptr) + { + return nullptr; + } + + return NewAddRec(phiLoop, enterScev, step); + } + case GT_CAST: + { + GenTreeCast* cast = tree->AsCast(); + if (cast->gtCastType != TYP_LONG) + { + return nullptr; + } + + Scev* op = Analyze(block, cast->CastOp()); + if (op == nullptr) + { + return nullptr; + } + + return NewExtension(cast->IsUnsigned() ? ScevOper::ZeroExtend : ScevOper::SignExtend, TYP_LONG, op); + } + case GT_ADD: + case GT_MUL: + case GT_LSH: + { + Scev* op1 = Analyze(block, tree->gtGetOp1()); + if (op1 == nullptr) + return nullptr; + + Scev* op2 = Analyze(block, tree->gtGetOp2()); + if (op2 == nullptr) + return nullptr; + + ScevOper oper; + switch (tree->OperGet()) + { + case GT_ADD: oper = ScevOper::Add; break; + case GT_MUL: oper = ScevOper::Mul; break; + case GT_LSH: oper = ScevOper::Lsh; break; + default: unreached(); + } + + return NewBinop(oper, op1, op2); + } + case GT_COMMA: + { + return Analyze(block, tree->gtGetOp2()); + } + case GT_ARR_ADDR: + { + return Analyze(block, tree->AsArrAddr()->Addr()); + } + default: + return nullptr; + } +} + +Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) +{ + Scev* result; + if (!m_map.Lookup(tree, &result)) + { + result = AnalyzeNew(block, tree); + if (result != nullptr) + result = Fold(result); + m_map.Set(tree, result); + } + + return result; +} + +template +static T FoldArith(ScevOper oper, T op1, T op2) +{ + switch (oper) + { + case ScevOper::Add: return op1 + op2; + case ScevOper::Mul: return op1 * op2; + case ScevOper::Lsh: return op1 << op2; + default: unreached(); + } +} + +Scev* ScalarEvolutionContext::Fold(Scev* scev) +{ + switch (scev->Oper) + { + case ScevOper::Constant: + case ScevOper::Local: + { + return scev; + } + case ScevOper::ZeroExtend: + case ScevOper::SignExtend: + { + ScevUnop* unop = (ScevUnop*)scev; + assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type)); + + Scev* op1 = Fold(unop->Op1); + + if (unop->Type == op1->Type) + { + return op1; + } + + assert((unop->Type == TYP_LONG) && (op1->Type == TYP_INT)); + + if (op1->OperIs(ScevOper::Constant)) + { + ScevConstant* cns = (ScevConstant*)op1; + return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value : (int64_t)(int32_t)cns->Value); + } + + //if (op1->OperIs(ScevOper::AddRec)) + //{ + // // TODO: We need to prove the extension can be removed safely... + // return op1; + //} + + return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1); + } + case ScevOper::Add: + case ScevOper::Mul: + case ScevOper::Lsh: + { + ScevBinop* binop = (ScevBinop*)scev; + Scev* op1 = Fold(binop->Op1); + Scev* op2 = Fold(binop->Op2); + + if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) + { + // Normalize addrecs to the left + if (op2->OperIs(ScevOper::AddRec)) + { + std::swap(op1, op2); + } + // Normalize constants to the right + if (op1->OperIs(ScevOper::Constant) && !op2->OperIs(ScevOper::Constant)) + { + std::swap(op1, op2); + } + } + + if (op1->OperIs(ScevOper::AddRec)) + { + // + x => + // * x => + ScevAddRec* addRec = (ScevAddRec*)op1; + Scev* newStart = Fold(NewBinop(binop->Oper, addRec->Start, op2)); + Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) ? Fold(NewBinop(binop->Oper, addRec->Step, op2)) : addRec->Step; + return NewAddRec(addRec->Loop, newStart, newStep); + } + + if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant)) + { + ScevConstant* cns1 = (ScevConstant*)op1; + ScevConstant* cns2 = (ScevConstant*)op2; + int64_t newValue; + if (binop->TypeIs(TYP_INT)) + { + newValue = FoldArith(binop->Oper, static_cast(cns1->Value), static_cast(cns2->Value)); + } + else + { + assert(binop->TypeIs(TYP_LONG)); + newValue = FoldArith(binop->Oper, cns1->Value, cns2->Value); + } + + return NewConstant(binop->Type, newValue); + } + + return (op1 == binop->Op1) && (op2 == binop->Op2) ? binop : NewBinop(binop->Oper, op1, op2); + } + case ScevOper::AddRec: + { + ScevAddRec* addRec = (ScevAddRec*)scev; + Scev* start = Fold(addRec->Start); + Scev* step = Fold(addRec->Step); + return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(addRec->Loop, start, step); + } + default: + unreached(); + } +} + //------------------------------------------------------------------------ // optInductionVariables: Try and optimize induction variables in the method. // @@ -43,9 +599,9 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("*************** In optInductionVariables()\n"); - JITDUMP("\n"); fgDispBasicBlocks(true); + m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); ScalarEvolutionContext scevContext(this); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) @@ -53,26 +609,28 @@ PhaseStatus Compiler::optInductionVariables() JITDUMP("Analyzing scalar evolution in "); FlowGraphNaturalLoop::Dump(loop); - for (Statement* stmt : loop->GetHeader()->Statements()) - { - if (!stmt->IsPhiDefnStmt()) - { - break; - } - - GenTreeLclVarCommon* phiDef = stmt->GetRootNode()->AsLclVarCommon(); - unsigned lclNum = phiDef->GetLclNum(); - GenTreePhi* phi = phiDef->Data()->AsPhi(); - - GenTree* backedgeDef = nullptr; - GenTree* initDef = nullptr; + loop->VisitLoopBlocksReversePostOrder([=, &scevContext](BasicBlock* block) { + DBEXEC(verbose, block->dspBlockHeader(this)); - for (GenTreePhi::Use& use : phi->Uses()) + for (Statement* stmt : block->Statements()) { - GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); + JITDUMP("\n"); + DISPSTMT(stmt); + for (GenTree* node : stmt->TreeList()) + { + Scev* scev = scevContext.Analyze(block, node); + if (scev != nullptr) + { + JITDUMP("[%06u] => ", dspTreeID(node)); + DumpScev(scev); + JITDUMP("\n"); + } + } } - } + + return BasicBlockVisit::Continue; + }); } return PhaseStatus::MODIFIED_NOTHING; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index c1d6a1ad5f6932..6b5792e3be692b 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -449,9 +449,9 @@ CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range #if defined(OPT_CONFIG) CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization -CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant -CONFIG_INTEGER(JitDoOptimizeIVs, W("JitDoOptimizeIVs"), 1) // Perform optimization of induction variables -CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation +CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant +CONFIG_INTEGER(JitDoOptimizeIVs, W("JitDoOptimizeIVs"), 1) // Perform optimization of induction variables +CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values CONFIG_INTEGER(JitDoLoopInversion, W("JitDoLoopInversion"), 1) // Perform loop inversion on "for/while" loops CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis From e2bca3aa0fbc0ec65fb667b9aa404ba059220e4f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 1 Feb 2024 13:16:51 +0100 Subject: [PATCH 03/64] Check invariance on IR instead of on SCEV to avoid cycles --- src/coreclr/jit/inductionvariableopts.cpp | 67 ++++++++++------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 77cad6cbf86781..fa4827dd104c3b 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -170,7 +170,7 @@ class ScalarEvolutionContext Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); GenTreeLclVarCommon* GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock); - bool IsInvariantInLoop(Scev* scev, FlowGraphNaturalLoop* loop); + bool IsInvariantInLoop(GenTree* tree, FlowGraphNaturalLoop* loop); public: ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_map(comp->getAllocator(CMK_LoopScalarEvolution)) @@ -237,42 +237,22 @@ GenTreeLclVarCommon* ScalarEvolutionContext::GetSsaDef(GenTreeLclVarCommon* lcl, return ssaDef; } -bool ScalarEvolutionContext::IsInvariantInLoop(Scev* scev, FlowGraphNaturalLoop* loop) +bool ScalarEvolutionContext::IsInvariantInLoop(GenTree* tree, FlowGraphNaturalLoop* loop) { - switch (scev->Oper) + if (tree->IsInvariant()) { - case ScevOper::Constant: - { - return true; - } - case ScevOper::Local: - { - ScevLocal* invariantLocal = (ScevLocal*)scev; - LclVarDsc* dsc = m_comp->lvaGetDesc(invariantLocal->LclNum); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(invariantLocal->SsaNum); - return (ssaDsc->GetBlock() == nullptr) || !loop->ContainsBlock(ssaDsc->GetBlock()); - } - case ScevOper::ZeroExtend: - case ScevOper::SignExtend: - { - ScevUnop* unop = (ScevUnop*)scev; - return IsInvariantInLoop(unop->Op1, loop); - } - case ScevOper::Add: - case ScevOper::Mul: - case ScevOper::Lsh: - { - ScevBinop* binop = (ScevBinop*)scev; - return IsInvariantInLoop(binop->Op1, loop) && IsInvariantInLoop(binop->Op2, loop); - } - case ScevOper::AddRec: - { - ScevAddRec* addRec = (ScevAddRec*)scev; - return !loop->ContainsBlock(addRec->Loop->GetHeader()); - } - default: - unreached(); + return true; } + + if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); + + return (ssaDsc->GetBlock() == nullptr) || !loop->ContainsBlock(ssaDsc->GetBlock()); + } + + return false; } Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) @@ -387,9 +367,19 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } + if (!IsInvariantInLoop(stepTree, phiLoop)) + { + // TODO: This is also conservative, e.g. it won't handle + // chasing through SSA defs for the step. But maybe we don't + // care. + return nullptr; + } + + // The invariance check guarantees that this doesn't find a cycle + // involving this GT_STORE_LCL_VAR. Scev* step = Analyze(stepDefBlock, stepTree); - if ((step == nullptr) || !IsInvariantInLoop(step, phiLoop)) + if (step == nullptr) { return nullptr; } @@ -466,9 +456,7 @@ Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) Scev* result; if (!m_map.Lookup(tree, &result)) { - result = AnalyzeNew(block, tree); - if (result != nullptr) - result = Fold(result); + result = AnalyzeNew(block, tree); m_map.Set(tree, result); } @@ -624,6 +612,9 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("[%06u] => ", dspTreeID(node)); DumpScev(scev); + Scev* folded = scevContext.Fold(scev); + JITDUMP(" => "); + DumpScev(scev); JITDUMP("\n"); } } From f400f03dc9149884c45ae0ebb2ef449131370064 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 1 Feb 2024 14:53:32 +0100 Subject: [PATCH 04/64] Some work on recursive scevs --- src/coreclr/jit/inductionvariableopts.cpp | 252 +++++++++++++++------- 1 file changed, 172 insertions(+), 80 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index fa4827dd104c3b..95ebfdc1c4aca2 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -166,17 +166,32 @@ static void DumpScev(Scev* scev) class ScalarEvolutionContext { Compiler* m_comp; - ScalarEvolutionMap m_map; + FlowGraphNaturalLoop* m_loop = nullptr; + ScalarEvolutionMap m_cache; + ScalarEvolutionMap m_cyclicCache; + bool m_usingCyclicCache = false; Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); + Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* start, BasicBlock* stepDefBlock, GenTree* stepDefData); + Scev* MakeAddRecFromRecursiveScev(Scev* start, Scev* scev, Scev* recursiveScev); GenTreeLclVarCommon* GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock); - bool IsInvariantInLoop(GenTree* tree, FlowGraphNaturalLoop* loop); + Scev* CreateSimpleInvariantScev(GenTree* tree); + bool TrackedLocalVariesInLoop(unsigned lclNum); public: - ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_map(comp->getAllocator(CMK_LoopScalarEvolution)) + ScalarEvolutionContext(Compiler* comp) : + m_comp(comp), + m_cache(comp->getAllocator(CMK_LoopScalarEvolution)), + m_cyclicCache(comp->getAllocator(CMK_LoopScalarEvolution)) { } + void ResetForLoop(FlowGraphNaturalLoop* loop) + { + m_loop = loop; + m_cache.RemoveAll(); + } + ScevConstant* NewConstant(var_types type, int64_t value) { ScevConstant* constant = new (m_comp, CMK_LoopScalarEvolution) ScevConstant(type, value); @@ -237,11 +252,11 @@ GenTreeLclVarCommon* ScalarEvolutionContext::GetSsaDef(GenTreeLclVarCommon* lcl, return ssaDef; } -bool ScalarEvolutionContext::IsInvariantInLoop(GenTree* tree, FlowGraphNaturalLoop* loop) +Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) { - if (tree->IsInvariant()) + if (tree->OperIsConst()) { - return true; + return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); } if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) @@ -249,7 +264,28 @@ bool ScalarEvolutionContext::IsInvariantInLoop(GenTree* tree, FlowGraphNaturalLo LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); - return (ssaDsc->GetBlock() == nullptr) || !loop->ContainsBlock(ssaDsc->GetBlock()); + if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) + { + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); + } + } + + return nullptr; +} + +bool ScalarEvolutionContext::TrackedLocalVariesInLoop(unsigned lclNum) +{ + for (Statement* stmt : m_loop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + { + break; + } + + if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum) + { + return true; + } } return false; @@ -266,21 +302,23 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } case GT_LCL_VAR: { - BasicBlock* defBlock; - GenTreeLclVarCommon* def = GetSsaDef(tree->AsLclVarCommon(), &defBlock); - if (def == nullptr) + if (!tree->AsLclVarCommon()->HasSsaName()) { - if (m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())) - { - return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); - } - else - { - return nullptr; - } + return nullptr; } - return Analyze(defBlock, def); + assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())); + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); + + if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) + { + // Invariant local + assert(!TrackedLocalVariesInLoop(tree->AsLclVarCommon()->GetLclNum())); + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); + } + + return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode()); } case GT_STORE_LCL_VAR: { @@ -291,13 +329,13 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return Analyze(block, data); } - // We have a phi def. Look for a primary induction variable. - FlowGraphNaturalLoop* phiLoop = m_comp->m_blockToLoop->GetLoop(block); - if (phiLoop->GetHeader() != block) + if (block != m_loop->GetHeader()) { return nullptr; } + // We have a phi def for the current loop. Look for a primary + // induction variable. GenTreePhi* phi = data->AsPhi(); GenTreePhiArg* enterSsa = nullptr; GenTreePhiArg* backedgeSsa = nullptr; @@ -305,7 +343,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) for (GenTreePhi::Use& use : phi->Uses()) { GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); - GenTreePhiArg*& ssaArg = phiLoop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa; + GenTreePhiArg*& ssaArg = m_loop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa; if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum())) { ssaArg = phiArg; @@ -321,83 +359,63 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } - BasicBlock* stepDefBlock; - GenTreeLclVarCommon* stepDef = GetSsaDef(backedgeSsa, &stepDefBlock); - if (stepDef == nullptr) - { - return nullptr; - } + LclVarDsc* dsc = m_comp->lvaGetDesc(store); - GenTree* stepDefData = stepDef->Data(); + assert(enterSsa->GetLclNum() == store->GetLclNum()); + LclSsaVarDsc* enterSsaDsc = dsc->GetPerSsaData(enterSsa->GetSsaNum()); + + assert((enterSsaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(enterSsaDsc->GetBlock())); - if (!stepDefData->OperIs(GT_ADD)) + // TODO: Represent initial value? E.g. "for (; arg < 10; arg++)" => ? + // Also zeroed locals. + if (enterSsaDsc->GetDefNode() == nullptr) { - // TODO: Handle patterns like: - // - // int i = 0; - // while (true) - // { - // int j = i + 1; - // ... - // i = j; - // } - // - // I think we can eagerly insert a node in the cache for - // "store"; we'll end up with some SCEV with a cycle in it that - // is going to look a bit like a µ-type, e.g. µ.µ + 1, that can - // be translated back to an add recurrence. return nullptr; } - GenTree* stepTree; - GenTree* op1 = stepDefData->gtGetOp1(); - GenTree* op2 = stepDefData->gtGetOp2(); - if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == store->GetLclNum()) && - (op1->AsLclVar()->GetSsaNum() == store->GetSsaNum())) - { - stepTree = op2; - } - else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == store->GetLclNum()) && - (op2->AsLclVar()->GetSsaNum() == store->GetSsaNum())) - { - stepTree = op1; - } - else + Scev* enterScev = Analyze(enterSsaDsc->GetBlock(), enterSsaDsc->GetDefNode()); + + if (enterScev == nullptr) { return nullptr; } - if (!IsInvariantInLoop(stepTree, phiLoop)) + BasicBlock* stepDefBlock; + GenTreeLclVarCommon* stepDef = GetSsaDef(backedgeSsa, &stepDefBlock); + assert(stepDef != nullptr); // This should always exist since it comes from a backedge. + + GenTree* stepDefData = stepDef->Data(); + + Scev* simpleAddRec = CreateSimpleAddRec(store, enterScev, stepDefBlock, stepDefData); + + if (simpleAddRec != nullptr) { - // TODO: This is also conservative, e.g. it won't handle - // chasing through SSA defs for the step. But maybe we don't - // care. - return nullptr; + return simpleAddRec; } - // The invariance check guarantees that this doesn't find a cycle - // involving this GT_STORE_LCL_VAR. - Scev* step = Analyze(stepDefBlock, stepTree); + ScevConstant* symbolicAddRec = NewConstant(TYP_INT, 0xdeadbeef); + m_cyclicCache.Emplace(store, symbolicAddRec); - if (step == nullptr) + Scev* result; + if (m_usingCyclicCache) { - return nullptr; + result = Analyze(stepDefBlock, stepDefData); } - - BasicBlock* enterDefBlock; - GenTreeLclVarCommon* enterDef = GetSsaDef(enterSsa, &enterDefBlock); - if (enterDef == nullptr) + else { - return nullptr; + m_usingCyclicCache = true; + + result = Analyze(stepDefBlock, stepDefData); + m_usingCyclicCache = false; + m_cyclicCache.RemoveAll(); } - Scev* enterScev = Analyze(enterDefBlock, enterDef); - if (enterScev == nullptr) + if (result == nullptr) { return nullptr; } - return NewAddRec(phiLoop, enterScev, step); + return MakeAddRecFromRecursiveScev(enterScev, result, symbolicAddRec); } case GT_CAST: { @@ -451,13 +469,86 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } } +Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* enterScev, BasicBlock* stepDefBlock, GenTree* stepDefData) +{ + if (!stepDefData->OperIs(GT_ADD)) + { + return nullptr; + } + + GenTree* stepTree; + GenTree* op1 = stepDefData->gtGetOp1(); + GenTree* op2 = stepDefData->gtGetOp2(); + if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && + (op1->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) + { + stepTree = op2; + } + else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && + (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) + { + stepTree = op1; + } + else + { + return nullptr; + } + + Scev* stepScev = CreateSimpleInvariantScev(stepTree); + if (stepScev == nullptr) + { + return nullptr; + } + + return NewAddRec(m_loop, enterScev, stepScev); +} + +//------------------------------------------------------------------------ +// MakeAddRecFromRecursiveScev: Given a SCEV and a recursive SCEV that the first one may contain, +// create a non-recursive add-rec from it. +// +// Parameters: +// scev - The scev +// recursiveScev - A symbolic node whose appearence represents an appearence of "scev" +// +// Returns: +// A non-recursive addrec +// +// Remarks: +// Currently only handles simple binary addition +// +Scev* ScalarEvolutionContext::MakeAddRecFromRecursiveScev(Scev* startScev, Scev* scev, Scev* recursiveScev) +{ + if (!scev->OperIs(ScevOper::Add)) + { + return nullptr; + } + + ScevBinop* add = (ScevBinop*)scev; + if ((add->Op1 == recursiveScev) && (add->Op2 != recursiveScev) && add->Op2->OperIs(ScevOper::Constant, ScevOper::AddRec)) + { + return NewAddRec(m_loop, startScev, add->Op2); + } + + if ((add->Op2 == recursiveScev) && add->Op1->OperIs(ScevOper::Constant, ScevOper::AddRec)) + { + return NewAddRec(m_loop, startScev, add->Op1); + } + + return nullptr; +} + Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) { Scev* result; - if (!m_map.Lookup(tree, &result)) + if (!m_cache.Lookup(tree, &result) && (!m_usingCyclicCache || !m_cyclicCache.Lookup(tree, &result))) { result = AnalyzeNew(block, tree); - m_map.Set(tree, result); + + if (m_usingCyclicCache) + m_cyclicCache.Set(tree, result, ScalarEvolutionMap::Overwrite); + else + m_cache.Set(tree, result); } return result; @@ -596,6 +687,7 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("Analyzing scalar evolution in "); FlowGraphNaturalLoop::Dump(loop); + scevContext.ResetForLoop(loop); loop->VisitLoopBlocksReversePostOrder([=, &scevContext](BasicBlock* block) { DBEXEC(verbose, block->dspBlockHeader(this)); @@ -614,7 +706,7 @@ PhaseStatus Compiler::optInductionVariables() DumpScev(scev); Scev* folded = scevContext.Fold(scev); JITDUMP(" => "); - DumpScev(scev); + DumpScev(folded); JITDUMP("\n"); } } From 05562bbf527d1ea43f83441ce0861d86791735f7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 1 Feb 2024 14:58:59 +0100 Subject: [PATCH 05/64] Simplify for now --- src/coreclr/jit/inductionvariableopts.cpp | 71 ++++++++++------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 95ebfdc1c4aca2..7fd65986f6cf45 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -168,8 +168,6 @@ class ScalarEvolutionContext Compiler* m_comp; FlowGraphNaturalLoop* m_loop = nullptr; ScalarEvolutionMap m_cache; - ScalarEvolutionMap m_cyclicCache; - bool m_usingCyclicCache = false; Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* start, BasicBlock* stepDefBlock, GenTree* stepDefData); @@ -181,8 +179,7 @@ class ScalarEvolutionContext public: ScalarEvolutionContext(Compiler* comp) : m_comp(comp), - m_cache(comp->getAllocator(CMK_LoopScalarEvolution)), - m_cyclicCache(comp->getAllocator(CMK_LoopScalarEvolution)) + m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) { } @@ -386,36 +383,34 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) GenTree* stepDefData = stepDef->Data(); - Scev* simpleAddRec = CreateSimpleAddRec(store, enterScev, stepDefBlock, stepDefData); - - if (simpleAddRec != nullptr) - { - return simpleAddRec; - } - - ScevConstant* symbolicAddRec = NewConstant(TYP_INT, 0xdeadbeef); - m_cyclicCache.Emplace(store, symbolicAddRec); - - Scev* result; - if (m_usingCyclicCache) - { - result = Analyze(stepDefBlock, stepDefData); - } - else - { - m_usingCyclicCache = true; - - result = Analyze(stepDefBlock, stepDefData); - m_usingCyclicCache = false; - m_cyclicCache.RemoveAll(); - } - - if (result == nullptr) - { - return nullptr; - } - - return MakeAddRecFromRecursiveScev(enterScev, result, symbolicAddRec); + // We currently do not handle complicated addrecs. We can do this + // by inserting a symbolic node in the cache and analyzing with it + // cached it. It would allow us to model things like + // + // int i = 0; + // while (i < n) + // { + // int j = i + 1; + // ... + // i = j; + // } + // => + // + // and chains of recurrences, such as + // + // int i = 0; + // int j = 0; + // while (i < n) + // { + // j++; + // i += j; + // } + // => > + // + // The main issue is that it requires cache invalidation afterwards + // and turning the recursive result into an addrec. + // + return CreateSimpleAddRec(store, enterScev, stepDefBlock, stepDefData); } case GT_CAST: { @@ -541,14 +536,10 @@ Scev* ScalarEvolutionContext::MakeAddRecFromRecursiveScev(Scev* startScev, Scev* Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) { Scev* result; - if (!m_cache.Lookup(tree, &result) && (!m_usingCyclicCache || !m_cyclicCache.Lookup(tree, &result))) + if (!m_cache.Lookup(tree, &result)) { result = AnalyzeNew(block, tree); - - if (m_usingCyclicCache) - m_cyclicCache.Set(tree, result, ScalarEvolutionMap::Overwrite); - else - m_cache.Set(tree, result); + m_cache.Set(tree, result); } return result; From 3036826f3a4241e4c62d969ff92217409595f9d4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 11:33:29 +0100 Subject: [PATCH 06/64] IV widening, fix bugs --- src/coreclr/jit/compiler.h | 6 + src/coreclr/jit/compiler.hpp | 38 +++ src/coreclr/jit/inductionvariableopts.cpp | 381 +++++++++++++++++----- 3 files changed, 339 insertions(+), 86 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 17756520385634..0551f808404ecd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2181,6 +2181,9 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocksLexical(TFunc func); + template + BasicBlockVisit VisitAllExitBlocks(TFunc func); + BasicBlock* GetLexicallyTopMostBlock(); BasicBlock* GetLexicallyBottomMostBlock(); @@ -7378,6 +7381,9 @@ class Compiler #endif PhaseStatus optInductionVariables(); + bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); + void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); + bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); // Redundant branch opts // diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 22a36940820ee6..e62bbb0a80c873 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4929,6 +4929,44 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) return BasicBlockVisit::Continue; } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitAllExitBlocks: Visit all blocks that are outside +// the loop but that may have predecessors inside the loop. This includes +// handler blocks. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitAllExitBlocks(TFunc func) +{ + Compiler* comp = m_dfsTree->GetCompiler(); + + BitVecTraits traits = m_dfsTree->PostOrderTraits(); + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + BasicBlockVisit result = VisitLoopBlocksReversePostOrder([&, comp](BasicBlock* block) { + return block->VisitAllSuccs(comp, [&](BasicBlock* succ) { + if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && (func(succ) == BasicBlockVisit::Abort)) + { + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + }); + + return result; +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 7fd65986f6cf45..b7cebb5780b45d 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -171,9 +171,8 @@ class ScalarEvolutionContext Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* start, BasicBlock* stepDefBlock, GenTree* stepDefData); - Scev* MakeAddRecFromRecursiveScev(Scev* start, Scev* scev, Scev* recursiveScev); - GenTreeLclVarCommon* GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock); Scev* CreateSimpleInvariantScev(GenTree* tree); + Scev* CreateScevForConstant(GenTreeIntConCommon* tree); bool TrackedLocalVariesInLoop(unsigned lclNum); public: @@ -229,26 +228,6 @@ class ScalarEvolutionContext Scev* Fold(Scev* scev); }; -GenTreeLclVarCommon* ScalarEvolutionContext::GetSsaDef(GenTreeLclVarCommon* lcl, BasicBlock** defBlock) -{ - assert(lcl->OperIs(GT_LCL_VAR, GT_PHI_ARG)); - if (!lcl->HasSsaName()) - return nullptr; - - LclVarDsc* dsc = m_comp->lvaGetDesc(lcl); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(lcl->GetSsaNum()); - GenTreeLclVarCommon* ssaDef = ssaDsc->GetDefNode(); - if (ssaDef == nullptr) - { - assert(lcl->GetSsaNum() == SsaConfig::FIRST_SSA_NUM); - // TODO: We should handle zero-inited locals and parameters in some proper way... - return nullptr; - } - assert(ssaDef->OperIsLocalStore()); - *defBlock = ssaDsc->GetBlock(); - return ssaDef; -} - Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) { if (tree->OperIsConst()) @@ -288,6 +267,16 @@ bool ScalarEvolutionContext::TrackedLocalVariesInLoop(unsigned lclNum) return false; } +Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) +{ + if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG)) + { + return nullptr; + } + + return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); +} + Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) { switch (tree->OperGet()) @@ -295,9 +284,10 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) case GT_CNS_INT: case GT_CNS_LNG: { - return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); + return CreateScevForConstant(tree->AsIntConCommon()); } case GT_LCL_VAR: + case GT_PHI_ARG: { if (!tree->AsLclVarCommon()->HasSsaName()) { @@ -311,10 +301,31 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) { // Invariant local - assert(!TrackedLocalVariesInLoop(tree->AsLclVarCommon()->GetLclNum())); + GenTreeLclVarCommon* def = ssaDsc->GetDefNode(); + if ((def != nullptr) && def->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + // For constant definitions from outside the loop we prefer to inline the constant. + // TODO: Maybe we shouldn't but should just do it when we dump the scev? + + return CreateScevForConstant(def->Data()->AsIntConCommon()); + } + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); } + if (ssaDsc->GetDefNode() == nullptr) + { + // GT_CALL retbuf def? + return nullptr; + } + + if (ssaDsc->GetDefNode()->GetLclNum() != tree->AsLclVarCommon()->GetLclNum()) + { + // Should be a def of the parent + assert(dsc->lvIsStructField && (ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl)); + return nullptr; + } + return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode()); } case GT_STORE_LCL_VAR: @@ -356,36 +367,33 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } - LclVarDsc* dsc = m_comp->lvaGetDesc(store); - - assert(enterSsa->GetLclNum() == store->GetLclNum()); - LclSsaVarDsc* enterSsaDsc = dsc->GetPerSsaData(enterSsa->GetSsaNum()); + Scev* enterScev = Analyze(block, enterSsa); - assert((enterSsaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(enterSsaDsc->GetBlock())); - - // TODO: Represent initial value? E.g. "for (; arg < 10; arg++)" => ? - // Also zeroed locals. - if (enterSsaDsc->GetDefNode() == nullptr) + if (enterScev == nullptr) { return nullptr; } - Scev* enterScev = Analyze(enterSsaDsc->GetBlock(), enterSsaDsc->GetDefNode()); + LclVarDsc* dsc = m_comp->lvaGetDesc(store); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); - if (enterScev == nullptr) + if (ssaDsc->GetDefNode() == nullptr) { + // GT_CALL retbuf def return nullptr; } - BasicBlock* stepDefBlock; - GenTreeLclVarCommon* stepDef = GetSsaDef(backedgeSsa, &stepDefBlock); - assert(stepDef != nullptr); // This should always exist since it comes from a backedge. + if (ssaDsc->GetDefNode()->GetLclNum() != store->GetLclNum()) + { + assert(dsc->lvIsStructField && ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl); + return nullptr; + } - GenTree* stepDefData = stepDef->Data(); + assert(ssaDsc->GetBlock() != nullptr); // We currently do not handle complicated addrecs. We can do this - // by inserting a symbolic node in the cache and analyzing with it - // cached it. It would allow us to model things like + // by inserting a symbolic node in the cache and analyzing while it + // is part of the cache. It would allow us to model // // int i = 0; // while (i < n) @@ -410,7 +418,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) // The main issue is that it requires cache invalidation afterwards // and turning the recursive result into an addrec. // - return CreateSimpleAddRec(store, enterScev, stepDefBlock, stepDefData); + return CreateSimpleAddRec(store, enterScev, ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data()); } case GT_CAST: { @@ -498,41 +506,6 @@ Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStor return NewAddRec(m_loop, enterScev, stepScev); } -//------------------------------------------------------------------------ -// MakeAddRecFromRecursiveScev: Given a SCEV and a recursive SCEV that the first one may contain, -// create a non-recursive add-rec from it. -// -// Parameters: -// scev - The scev -// recursiveScev - A symbolic node whose appearence represents an appearence of "scev" -// -// Returns: -// A non-recursive addrec -// -// Remarks: -// Currently only handles simple binary addition -// -Scev* ScalarEvolutionContext::MakeAddRecFromRecursiveScev(Scev* startScev, Scev* scev, Scev* recursiveScev) -{ - if (!scev->OperIs(ScevOper::Add)) - { - return nullptr; - } - - ScevBinop* add = (ScevBinop*)scev; - if ((add->Op1 == recursiveScev) && (add->Op2 != recursiveScev) && add->Op2->OperIs(ScevOper::Constant, ScevOper::AddRec)) - { - return NewAddRec(m_loop, startScev, add->Op2); - } - - if ((add->Op2 == recursiveScev) && add->Op1->OperIs(ScevOper::Constant, ScevOper::AddRec)) - { - return NewAddRec(m_loop, startScev, add->Op1); - } - - return nullptr; -} - Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) { Scev* result; @@ -587,9 +560,9 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value : (int64_t)(int32_t)cns->Value); } + // Folding these requires some proof that it is ok. //if (op1->OperIs(ScevOper::AddRec)) //{ - // // TODO: We need to prove the extension can be removed safely... // return op1; //} @@ -659,6 +632,139 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) } } +bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) +{ + LclVarDsc* dsc = lvaGetDesc(lclNum); + + BasicBlockVisit result = loop->VisitAllExitBlocks([=](BasicBlock* exit) { + if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + { + JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); + return BasicBlockVisit::Continue; + } + + for (FlowEdge* predEdge = BlockPredsWithEH(exit); predEdge != nullptr; predEdge = predEdge->getNextPredEdge()) + { + if (!loop->ContainsBlock(predEdge->getSourceBlock())) + { + JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP "; it has a non-loop pred " FMT_BB "\n", lclNum, exit->bbNum, loop->GetIndex(), predEdge->getSourceBlock()->bbNum); + return BasicBlockVisit::Abort; + } + } + + JITDUMP(" V%02u is live into exit " FMT_BB "; will sink the widened value\n", lclNum, exit->bbNum); + return BasicBlockVisit::Continue; + }); + + return result == BasicBlockVisit::Continue; +} + +bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) +{ + bool anySunk = false; + LclVarDsc* dsc = lvaGetDesc(lclNum); + loop->VisitAllExitBlocks([=, &anySunk](BasicBlock* exit) { + if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + { + return BasicBlockVisit::Continue; + } + + GenTree* narrowing = gtNewCastNode(TYP_INT, gtNewLclvNode(newLclNum, TYP_LONG), false, TYP_INT); + GenTree* store = gtNewStoreLclVarNode(lclNum, narrowing); + Statement* newStmt = fgNewStmtFromTree(store); + JITDUMP("Narrow IV local V%02u live into exit block " FMT_BB "; sinking a narrowing\n", lclNum, exit->bbNum); + DISPSTMT(newStmt); + fgInsertStmtAtBeg(exit, newStmt); + anySunk = true; + + return BasicBlockVisit::Continue; + }); + + return anySunk; +} + +void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt) +{ + struct ReplaceVisitor : GenTreeVisitor + { + private: + unsigned m_lclNum; + unsigned m_newLclNum; + + public: + bool MadeChanges = false; + + enum + { + DoPreOrder = true, + }; + + ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* node = *use; + if (node->OperIs(GT_CAST)) + { + GenTreeCast* cast = node->AsCast(); + if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) + { + GenTree* op = cast->CastOp(); + if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) + { + *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); + MadeChanges = true; + return fgWalkResult::WALK_SKIP_SUBTREES; + } + } + } + else if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD) && (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) + { + switch (node->OperGet()) + { + case GT_LCL_VAR: + node->AsLclVarCommon()->SetLclNum(m_newLclNum); + // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals. + break; + case GT_LCL_FLD: + case GT_STORE_LCL_FLD: // TODO: Do we need to skip widening if we have one of these? + node->AsLclFld()->SetLclNum(m_newLclNum); + m_compiler->lvaSetVarDoNotEnregister(m_newLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + break; + case GT_STORE_LCL_VAR: + node->AsLclVarCommon()->SetLclNum(m_newLclNum); + node->AsLclVarCommon()->gtType = TYP_LONG; + node->AsLclVarCommon()->Data() = m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); + break; + } + + MadeChanges = true; + } + + return fgWalkResult::WALK_CONTINUE; + } + }; + + ReplaceVisitor visitor(this, lclNum, newLclNum); + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + if (visitor.MadeChanges) + { + compCurStmt = stmt; + //stmt->SetRootNode(fgMorphTree(stmt->GetRootNode())); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + JITDUMP("New tree:\n", dspTreeID(stmt->GetRootNode())); + DISPTREE(stmt->GetRootNode()); + JITDUMP("\n"); + } + else + { + JITDUMP("No replacements made\n"); + } +} + //------------------------------------------------------------------------ // optInductionVariables: Try and optimize induction variables in the method. // @@ -669,24 +775,23 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("*************** In optInductionVariables()\n"); - fgDispBasicBlocks(true); - m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); ScalarEvolutionContext scevContext(this); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { JITDUMP("Analyzing scalar evolution in "); - FlowGraphNaturalLoop::Dump(loop); + DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop)); scevContext.ResetForLoop(loop); loop->VisitLoopBlocksReversePostOrder([=, &scevContext](BasicBlock* block) { DBEXEC(verbose, block->dspBlockHeader(this)); + JITDUMP("\n"); for (Statement* stmt : block->Statements()) { - JITDUMP("\n"); DISPSTMT(stmt); + JITDUMP("\n"); for (GenTree* node : stmt->TreeList()) { @@ -694,18 +799,122 @@ PhaseStatus Compiler::optInductionVariables() if (scev != nullptr) { JITDUMP("[%06u] => ", dspTreeID(node)); - DumpScev(scev); + DBEXEC(verbose, DumpScev(scev)); + JITDUMP("\n => ", dspTreeID(node)); Scev* folded = scevContext.Fold(scev); - JITDUMP(" => "); - DumpScev(folded); + DBEXEC(verbose, DumpScev(folded)); JITDUMP("\n"); } } + + JITDUMP("\n"); } return BasicBlockVisit::Continue; }); } - return PhaseStatus::MODIFIED_NOTHING; + bool changed = false; + +#ifdef TARGET_64BIT + JITDUMP("Widening primary induction variables:\n"); + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) + { + JITDUMP("Processing "); + DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop)); + scevContext.ResetForLoop(loop); + + for (Statement* stmt : loop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + { + break; + } + + JITDUMP("\n"); + + DISPSTMT(stmt); + + GenTreeLclVarCommon* lcl = stmt->GetRootNode()->AsLclVarCommon(); + if (genActualType(lcl) != TYP_INT) + { + JITDUMP(" Type is %s, no widening to be done\n", varTypeName(genActualType(lcl))); + continue; + } + + Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); + if (scev == nullptr) + { + JITDUMP(" Could not analyze header PHI\n"); + continue; + } + + scev = scevContext.Fold(scev); + JITDUMP(" => "); + DBEXEC(verbose, DumpScev(scev)); + JITDUMP("\n"); + if (!scev->OperIs(ScevOper::AddRec)) + { + JITDUMP(" Not an addrec\n"); + continue; + } + + ScevAddRec* addRec = (ScevAddRec*)scev; + + JITDUMP(" V%02u is a primary induction variable in " FMT_LP "\n", lcl->GetLclNum(), loop->GetIndex()); + + if (!optCanSinkWidenedIV(lcl->GetLclNum(), loop)) + { + continue; + } + + changed = true; + unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Widened primary induction variable")); + JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); + + GenTree* initVal; + if (addRec->Start->OperIs(ScevOper::Constant)) + { + ScevConstant* cns = (ScevConstant*)addRec->Start; + initVal = gtNewIconNode((int64_t)(uint32_t)(((ScevConstant*)addRec->Start)->Value), TYP_LONG); + } + else + { + LclVarDsc* lclDsc = lvaGetDesc(lcl); + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), lclDsc->lvNormalizeOnLoad() ? lclDsc->TypeGet() : TYP_INT), true, TYP_LONG); + } + + JITDUMP("Adding initialization of new widened local to preheader:\n"); + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); + Statement* initStmt = fgNewStmtFromTree(widenStore); + fgInsertStmtAtEnd(preheader, initStmt); + DISPSTMT(initStmt); + JITDUMP("\n"); + + loop->VisitLoopBlocks([=](BasicBlock* block) { + + compCurBB = block; + for (Statement* stmt : block->NonPhiStatements()) + { + JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, dspTreeID(stmt->GetRootNode())); + DISPSTMT(stmt); + JITDUMP("\n"); + optReplaceWidenedIV(lcl->GetLclNum(), newLclNum, stmt); + } + + return BasicBlockVisit::Continue; + }); + + changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); + } + } +#endif + + if (changed) + { + fgSsaBuild(); + } + + return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From 974e7f992211b777c844fe72afcd9539ed974e5d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 12:12:08 +0100 Subject: [PATCH 07/64] Bail for non-integer constants --- src/coreclr/jit/inductionvariableopts.cpp | 19 +++++++++++++++++-- src/coreclr/jit/jitconfigvalues.h | 3 +++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index b7cebb5780b45d..75ae4b7d0bddc7 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -230,9 +230,9 @@ class ScalarEvolutionContext Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) { - if (tree->OperIsConst()) + if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG)) { - return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); + return CreateScevForConstant(tree->AsIntConCommon()); } if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) @@ -775,6 +775,21 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("*************** In optInductionVariables()\n"); + if (JitConfig.JitEnableInductionVariableOpts() == 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + +#ifdef DEBUG + static ConfigMethodRange s_range; + s_range.EnsureInit(JitConfig.JitEnableInductionVariableOptsRange()); + + if (!s_range.Contains(info.compMethodHash())) + { + return PhaseStatus::MODIFIED_NOTHING; + } +#endif + m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); ScalarEvolutionContext scevContext(this); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 0e15d072cf1052..767ff46276cebe 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -497,6 +497,7 @@ CONFIG_STRING(JitOnlyOptimizeRange, W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts CONFIG_STRING(JitEnablePhysicalPromotionRange, W("JitEnablePhysicalPromotionRange")) CONFIG_STRING(JitEnableCrossBlockLocalAssertionPropRange, W("JitEnableCrossBlockLocalAssertionPropRange")) +CONFIG_STRING(JitEnableInductionVariableOptsRange, W("JitEnableInductionVariableOptsRange")) CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions @@ -694,6 +695,8 @@ CONFIG_INTEGER(JitEnablePhysicalPromotion, W("JitEnablePhysicalPromotion"), 1) // Enable cross-block local assertion prop CONFIG_INTEGER(JitEnableCrossBlockLocalAssertionProp, W("JitEnableCrossBlockLocalAssertionProp"), 1) +CONFIG_INTEGER(JitEnableInductionVariableOpts, W("JitEnableInductionVariableOpts"), 1) + #if defined(DEBUG) // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the // file, certain other JIT config variables will be active. If the currently compiled function is not in the file, From c51005433c2abd071f56b91962c3f1b83f5fe358 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 12:56:03 +0100 Subject: [PATCH 08/64] Add costing --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/inductionvariableopts.cpp | 113 ++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a842c3f8b96007..60c2ec6e4dfbc6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7385,6 +7385,7 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); + bool optIsIVWideningProfitable(unsigned lclNum, struct ScevAddRec* addRec, FlowGraphNaturalLoop* loop); void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 75ae4b7d0bddc7..a6422371631e07 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -659,6 +659,114 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return result == BasicBlockVisit::Continue; } +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, FlowGraphNaturalLoop* loop) +{ + struct CountZeroExtensionsVisitor : GenTreeVisitor + { + private: + unsigned m_lclNum; + public: + enum + { + DoPreOrder = true, + }; + + unsigned NumExtensions = 0; + + CountZeroExtensionsVisitor(Compiler* comp, unsigned lclNum) : GenTreeVisitor(comp), m_lclNum(lclNum) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) + { + GenTree* node = *use; + if (node->OperIs(GT_CAST)) + { + GenTreeCast* cast = node->AsCast(); + if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) + { + GenTree* op = cast->CastOp(); + if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) + { + NumExtensions++; + return WALK_SKIP_SUBTREES; + } + } + } + + return WALK_CONTINUE; + } + }; + + const weight_t ExtensionCost = 2; + const int ExtensionSize = 3; + + CountZeroExtensionsVisitor visitor(this, lclNum); + weight_t savedCost = 0; + int savedSize = 0; + + loop->VisitLoopBlocks([&](BasicBlock* block) { + visitor.NumExtensions = 0; + + for (Statement* stmt : block->NonPhiStatements()) + { + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + } + + savedSize += (int)visitor.NumExtensions * ExtensionSize; + savedCost += visitor.NumExtensions * block->getBBWeight(this) * ExtensionCost; + return BasicBlockVisit::Continue; + }); + + if (!addRec->Start->OperIs(ScevOper::Constant)) + { + // Need to insert a move from the narrow local in the preheader. + savedSize -= ExtensionSize; + savedCost -= loop->EntryEdge(0)->getSourceBlock()->getBBWeight(this) * ExtensionCost; + } + else + { + // If this is a constant then we are likely going to save the cost of + // initializing the narrow local which will balance out initializing + // the widened local. + } + + // Now account for the cost of sinks. + LclVarDsc* dsc = lvaGetDesc(lclNum); + loop->VisitAllExitBlocks([&](BasicBlock* exit) { + if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + { + savedSize -= ExtensionSize; + savedCost -= exit->getBBWeight(this) * ExtensionCost; + } + return BasicBlockVisit::Continue; + }); + + const weight_t ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT = 2; + weight_t cycleImprovementPerInvoc = savedCost / fgFirstBB->getBBWeight(this); + + JITDUMP(" Estimated cycle improvement: " FMT_WT " cycles per invocation\n", cycleImprovementPerInvoc); + JITDUMP(" Estimated size improvement: %d bytes\n", savedSize); + + if ((cycleImprovementPerInvoc > 0) && + ((cycleImprovementPerInvoc * ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT) >= -savedSize)) + { + JITDUMP(" Widening is profitable (cycle improvement)\n"); + return true; + } + + const weight_t ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT = 0.01; + + if ((savedSize > 0) && ((savedSize * ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT) >= -cycleImprovementPerInvoc)) + { + JITDUMP(" Widening is profitable (size improvement)\n"); + return true; + } + + JITDUMP(" Widening is not profitable\n"); + return false; +} + bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { bool anySunk = false; @@ -883,6 +991,11 @@ PhaseStatus Compiler::optInductionVariables() continue; } + if (!optIsIVWideningProfitable(lcl->GetLclNum(), addRec, loop)) + { + continue; + } + changed = true; unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Widened primary induction variable")); JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); From 96d5bec463de3582dc93939a65db0cd2bfcbf871 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 12:56:18 +0100 Subject: [PATCH 09/64] Run jit-format --- src/coreclr/jit/compiler.hpp | 7 +- src/coreclr/jit/inductionvariableopts.cpp | 191 +++++++++++++--------- 2 files changed, 117 insertions(+), 81 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index e62bbb0a80c873..216a0c82d51d74 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4951,18 +4951,19 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitAllExitBlocks(TFunc func) Compiler* comp = m_dfsTree->GetCompiler(); BitVecTraits traits = m_dfsTree->PostOrderTraits(); - BitVec visited(BitVecOps::MakeEmpty(&traits)); + BitVec visited(BitVecOps::MakeEmpty(&traits)); BasicBlockVisit result = VisitLoopBlocksReversePostOrder([&, comp](BasicBlock* block) { return block->VisitAllSuccs(comp, [&](BasicBlock* succ) { - if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && (func(succ) == BasicBlockVisit::Abort)) + if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && + (func(succ) == BasicBlockVisit::Abort)) { return BasicBlockVisit::Abort; } return BasicBlockVisit::Continue; - }); }); + }); return result; } diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index a6422371631e07..3c146619905618 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -137,10 +137,17 @@ static void DumpScev(Scev* scev) const char* op; switch (binop->Oper) { - case ScevOper::Add: op = "+"; break; - case ScevOper::Mul: op = "*"; break; - case ScevOper::Lsh: op = "<<"; break; - default: unreached(); + case ScevOper::Add: + op = "+"; + break; + case ScevOper::Mul: + op = "*"; + break; + case ScevOper::Lsh: + op = "<<"; + break; + default: + unreached(); } printf(" %s ", op); DumpScev(binop->Op2); @@ -165,20 +172,21 @@ static void DumpScev(Scev* scev) class ScalarEvolutionContext { - Compiler* m_comp; + Compiler* m_comp; FlowGraphNaturalLoop* m_loop = nullptr; - ScalarEvolutionMap m_cache; + ScalarEvolutionMap m_cache; Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); - Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* start, BasicBlock* stepDefBlock, GenTree* stepDefData); + Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, + Scev* start, + BasicBlock* stepDefBlock, + GenTree* stepDefData); Scev* CreateSimpleInvariantScev(GenTree* tree); Scev* CreateScevForConstant(GenTreeIntConCommon* tree); bool TrackedLocalVariesInLoop(unsigned lclNum); public: - ScalarEvolutionContext(Compiler* comp) : - m_comp(comp), - m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) + ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) { } @@ -196,9 +204,8 @@ class ScalarEvolutionContext ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) { - var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); - ScevLocal* invariantLocal = - new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); + var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); + ScevLocal* invariantLocal = new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); return invariantLocal; } @@ -237,7 +244,7 @@ Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) { - LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) @@ -295,7 +302,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())); - LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) @@ -374,8 +381,8 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } - LclVarDsc* dsc = m_comp->lvaGetDesc(store); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); + LclVarDsc* dsc = m_comp->lvaGetDesc(store); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); if (ssaDsc->GetDefNode() == nullptr) { @@ -451,10 +458,17 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) ScevOper oper; switch (tree->OperGet()) { - case GT_ADD: oper = ScevOper::Add; break; - case GT_MUL: oper = ScevOper::Mul; break; - case GT_LSH: oper = ScevOper::Lsh; break; - default: unreached(); + case GT_ADD: + oper = ScevOper::Add; + break; + case GT_MUL: + oper = ScevOper::Mul; + break; + case GT_LSH: + oper = ScevOper::Lsh; + break; + default: + unreached(); } return NewBinop(oper, op1, op2); @@ -472,7 +486,10 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } } -Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* enterScev, BasicBlock* stepDefBlock, GenTree* stepDefData) +Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, + Scev* enterScev, + BasicBlock* stepDefBlock, + GenTree* stepDefData) { if (!stepDefData->OperIs(GT_ADD)) { @@ -488,7 +505,7 @@ Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStor stepTree = op2; } else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && - (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) + (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) { stepTree = op1; } @@ -518,15 +535,19 @@ Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) return result; } -template +template static T FoldArith(ScevOper oper, T op1, T op2) { switch (oper) { - case ScevOper::Add: return op1 + op2; - case ScevOper::Mul: return op1 * op2; - case ScevOper::Lsh: return op1 << op2; - default: unreached(); + case ScevOper::Add: + return op1 + op2; + case ScevOper::Mul: + return op1 * op2; + case ScevOper::Lsh: + return op1 << op2; + default: + unreached(); } } @@ -557,11 +578,12 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) if (op1->OperIs(ScevOper::Constant)) { ScevConstant* cns = (ScevConstant*)op1; - return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value : (int64_t)(int32_t)cns->Value); + return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value + : (int64_t)(int32_t)cns->Value); } // Folding these requires some proof that it is ok. - //if (op1->OperIs(ScevOper::AddRec)) + // if (op1->OperIs(ScevOper::AddRec)) //{ // return op1; //} @@ -573,8 +595,8 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) case ScevOper::Lsh: { ScevBinop* binop = (ScevBinop*)scev; - Scev* op1 = Fold(binop->Op1); - Scev* op2 = Fold(binop->Op2); + Scev* op1 = Fold(binop->Op1); + Scev* op2 = Fold(binop->Op2); if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) { @@ -594,9 +616,11 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) { // + x => // * x => - ScevAddRec* addRec = (ScevAddRec*)op1; - Scev* newStart = Fold(NewBinop(binop->Oper, addRec->Start, op2)); - Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) ? Fold(NewBinop(binop->Oper, addRec->Step, op2)) : addRec->Step; + ScevAddRec* addRec = (ScevAddRec*)op1; + Scev* newStart = Fold(NewBinop(binop->Oper, addRec->Start, op2)); + Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) + ? Fold(NewBinop(binop->Oper, addRec->Step, op2)) + : addRec->Step; return NewAddRec(addRec->Loop, newStart, newStep); } @@ -607,7 +631,8 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) int64_t newValue; if (binop->TypeIs(TYP_INT)) { - newValue = FoldArith(binop->Oper, static_cast(cns1->Value), static_cast(cns2->Value)); + newValue = FoldArith(binop->Oper, static_cast(cns1->Value), + static_cast(cns2->Value)); } else { @@ -623,8 +648,8 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) case ScevOper::AddRec: { ScevAddRec* addRec = (ScevAddRec*)scev; - Scev* start = Fold(addRec->Start); - Scev* step = Fold(addRec->Step); + Scev* start = Fold(addRec->Start); + Scev* step = Fold(addRec->Step); return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(addRec->Loop, start, step); } default: @@ -647,14 +672,16 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { if (!loop->ContainsBlock(predEdge->getSourceBlock())) { - JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP "; it has a non-loop pred " FMT_BB "\n", lclNum, exit->bbNum, loop->GetIndex(), predEdge->getSourceBlock()->bbNum); + JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP + "; it has a non-loop pred " FMT_BB "\n", + lclNum, exit->bbNum, loop->GetIndex(), predEdge->getSourceBlock()->bbNum); return BasicBlockVisit::Abort; } } JITDUMP(" V%02u is live into exit " FMT_BB "; will sink the widened value\n", lclNum, exit->bbNum); return BasicBlockVisit::Continue; - }); + }); return result == BasicBlockVisit::Continue; } @@ -665,6 +692,7 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl { private: unsigned m_lclNum; + public: enum { @@ -699,11 +727,11 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl }; const weight_t ExtensionCost = 2; - const int ExtensionSize = 3; + const int ExtensionSize = 3; CountZeroExtensionsVisitor visitor(this, lclNum); - weight_t savedCost = 0; - int savedSize = 0; + weight_t savedCost = 0; + int savedSize = 0; loop->VisitLoopBlocks([&](BasicBlock* block) { visitor.NumExtensions = 0; @@ -716,7 +744,7 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl savedSize += (int)visitor.NumExtensions * ExtensionSize; savedCost += visitor.NumExtensions * block->getBBWeight(this) * ExtensionCost; return BasicBlockVisit::Continue; - }); + }); if (!addRec->Start->OperIs(ScevOper::Constant)) { @@ -740,10 +768,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl savedCost -= exit->getBBWeight(this) * ExtensionCost; } return BasicBlockVisit::Continue; - }); + }); const weight_t ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT = 2; - weight_t cycleImprovementPerInvoc = savedCost / fgFirstBB->getBBWeight(this); + weight_t cycleImprovementPerInvoc = savedCost / fgFirstBB->getBBWeight(this); JITDUMP(" Estimated cycle improvement: " FMT_WT " cycles per invocation\n", cycleImprovementPerInvoc); JITDUMP(" Estimated size improvement: %d bytes\n", savedSize); @@ -769,24 +797,24 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { - bool anySunk = false; - LclVarDsc* dsc = lvaGetDesc(lclNum); + bool anySunk = false; + LclVarDsc* dsc = lvaGetDesc(lclNum); loop->VisitAllExitBlocks([=, &anySunk](BasicBlock* exit) { if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { return BasicBlockVisit::Continue; } - GenTree* narrowing = gtNewCastNode(TYP_INT, gtNewLclvNode(newLclNum, TYP_LONG), false, TYP_INT); - GenTree* store = gtNewStoreLclVarNode(lclNum, narrowing); - Statement* newStmt = fgNewStmtFromTree(store); + GenTree* narrowing = gtNewCastNode(TYP_INT, gtNewLclvNode(newLclNum, TYP_LONG), false, TYP_INT); + GenTree* store = gtNewStoreLclVarNode(lclNum, narrowing); + Statement* newStmt = fgNewStmtFromTree(store); JITDUMP("Narrow IV local V%02u live into exit block " FMT_BB "; sinking a narrowing\n", lclNum, exit->bbNum); DISPSTMT(newStmt); fgInsertStmtAtBeg(exit, newStmt); anySunk = true; return BasicBlockVisit::Continue; - }); + }); return anySunk; } @@ -807,7 +835,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen DoPreOrder = true, }; - ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) + ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) + : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) { } @@ -822,30 +851,32 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen GenTree* op = cast->CastOp(); if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) { - *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); + *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); MadeChanges = true; return fgWalkResult::WALK_SKIP_SUBTREES; } } } - else if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD) && (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) + else if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD) && + (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) { switch (node->OperGet()) { - case GT_LCL_VAR: - node->AsLclVarCommon()->SetLclNum(m_newLclNum); - // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals. - break; - case GT_LCL_FLD: - case GT_STORE_LCL_FLD: // TODO: Do we need to skip widening if we have one of these? - node->AsLclFld()->SetLclNum(m_newLclNum); - m_compiler->lvaSetVarDoNotEnregister(m_newLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); - break; - case GT_STORE_LCL_VAR: - node->AsLclVarCommon()->SetLclNum(m_newLclNum); - node->AsLclVarCommon()->gtType = TYP_LONG; - node->AsLclVarCommon()->Data() = m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); - break; + case GT_LCL_VAR: + node->AsLclVarCommon()->SetLclNum(m_newLclNum); + // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals. + break; + case GT_LCL_FLD: + case GT_STORE_LCL_FLD: // TODO: Do we need to skip widening if we have one of these? + node->AsLclFld()->SetLclNum(m_newLclNum); + m_compiler->lvaSetVarDoNotEnregister(m_newLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + break; + case GT_STORE_LCL_VAR: + node->AsLclVarCommon()->SetLclNum(m_newLclNum); + node->AsLclVarCommon()->gtType = TYP_LONG; + node->AsLclVarCommon()->Data() = + m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); + break; } MadeChanges = true; @@ -860,7 +891,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen if (visitor.MadeChanges) { compCurStmt = stmt; - //stmt->SetRootNode(fgMorphTree(stmt->GetRootNode())); + // stmt->SetRootNode(fgMorphTree(stmt->GetRootNode())); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); JITDUMP("New tree:\n", dspTreeID(stmt->GetRootNode())); @@ -996,7 +1027,7 @@ PhaseStatus Compiler::optInductionVariables() continue; } - changed = true; + changed = true; unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Widened primary induction variable")); JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); @@ -1004,18 +1035,21 @@ PhaseStatus Compiler::optInductionVariables() if (addRec->Start->OperIs(ScevOper::Constant)) { ScevConstant* cns = (ScevConstant*)addRec->Start; - initVal = gtNewIconNode((int64_t)(uint32_t)(((ScevConstant*)addRec->Start)->Value), TYP_LONG); + initVal = gtNewIconNode((int64_t)(uint32_t)(((ScevConstant*)addRec->Start)->Value), TYP_LONG); } else { LclVarDsc* lclDsc = lvaGetDesc(lcl); - initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), lclDsc->lvNormalizeOnLoad() ? lclDsc->TypeGet() : TYP_INT), true, TYP_LONG); + initVal = + gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), + lclDsc->lvNormalizeOnLoad() ? lclDsc->TypeGet() : TYP_INT), + true, TYP_LONG); } JITDUMP("Adding initialization of new widened local to preheader:\n"); - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - Statement* initStmt = fgNewStmtFromTree(widenStore); + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); + Statement* initStmt = fgNewStmtFromTree(widenStore); fgInsertStmtAtEnd(preheader, initStmt); DISPSTMT(initStmt); JITDUMP("\n"); @@ -1025,14 +1059,15 @@ PhaseStatus Compiler::optInductionVariables() compCurBB = block; for (Statement* stmt : block->NonPhiStatements()) { - JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, dspTreeID(stmt->GetRootNode())); + JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, + dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); optReplaceWidenedIV(lcl->GetLclNum(), newLclNum, stmt); } return BasicBlockVisit::Continue; - }); + }); changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); } From 0592111a3627fad71f86538319cbe971e352e7a3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 13:22:44 +0100 Subject: [PATCH 10/64] Clean up --- src/coreclr/jit/inductionvariableopts.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 3c146619905618..3ad683155cec8b 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -582,11 +582,12 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) : (int64_t)(int32_t)cns->Value); } - // Folding these requires some proof that it is ok. - // if (op1->OperIs(ScevOper::AddRec)) - //{ - // return op1; - //} + if (op1->OperIs(ScevOper::AddRec)) + { + // TODO: This requires some proof that it is ok, but currently + // we do not rely on this. + return op1; + } return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1); } @@ -953,8 +954,6 @@ PhaseStatus Compiler::optInductionVariables() if (scev != nullptr) { JITDUMP("[%06u] => ", dspTreeID(node)); - DBEXEC(verbose, DumpScev(scev)); - JITDUMP("\n => ", dspTreeID(node)); Scev* folded = scevContext.Fold(scev); DBEXEC(verbose, DumpScev(folded)); JITDUMP("\n"); From 2a02a48d8ef9856711c2463e212c1687d7f8f842 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 13:35:50 +0100 Subject: [PATCH 11/64] Fix linux build --- src/coreclr/jit/inductionvariableopts.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 3ad683155cec8b..cdfb67570f7004 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -878,6 +878,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen node->AsLclVarCommon()->Data() = m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); break; + default: + break; } MadeChanges = true; From 99cca89570722d3793464c8b10ff391689f8b5cb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 2 Feb 2024 14:04:25 +0100 Subject: [PATCH 12/64] Clean up --- src/coreclr/jit/inductionvariableopts.cpp | 61 +---------------------- 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index cdfb67570f7004..25191fc5e1523f 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -183,7 +183,6 @@ class ScalarEvolutionContext GenTree* stepDefData); Scev* CreateSimpleInvariantScev(GenTree* tree); Scev* CreateScevForConstant(GenTreeIntConCommon* tree); - bool TrackedLocalVariesInLoop(unsigned lclNum); public: ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) @@ -231,7 +230,6 @@ class ScalarEvolutionContext } Scev* Analyze(BasicBlock* block, GenTree* tree); - Scev* Fold(Scev* scev); }; @@ -256,24 +254,6 @@ Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) return nullptr; } -bool ScalarEvolutionContext::TrackedLocalVariesInLoop(unsigned lclNum) -{ - for (Statement* stmt : m_loop->GetHeader()->Statements()) - { - if (!stmt->IsPhiDefnStmt()) - { - break; - } - - if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum) - { - return true; - } - } - - return false; -} - Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) { if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG)) @@ -893,8 +873,6 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); if (visitor.MadeChanges) { - compCurStmt = stmt; - // stmt->SetRootNode(fgMorphTree(stmt->GetRootNode())); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); JITDUMP("New tree:\n", dspTreeID(stmt->GetRootNode())); @@ -932,46 +910,10 @@ PhaseStatus Compiler::optInductionVariables() } #endif - m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); - ScalarEvolutionContext scevContext(this); - - for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) - { - JITDUMP("Analyzing scalar evolution in "); - DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop)); - scevContext.ResetForLoop(loop); - - loop->VisitLoopBlocksReversePostOrder([=, &scevContext](BasicBlock* block) { - DBEXEC(verbose, block->dspBlockHeader(this)); - JITDUMP("\n"); - - for (Statement* stmt : block->Statements()) - { - DISPSTMT(stmt); - JITDUMP("\n"); - - for (GenTree* node : stmt->TreeList()) - { - Scev* scev = scevContext.Analyze(block, node); - if (scev != nullptr) - { - JITDUMP("[%06u] => ", dspTreeID(node)); - Scev* folded = scevContext.Fold(scev); - DBEXEC(verbose, DumpScev(folded)); - JITDUMP("\n"); - } - } - - JITDUMP("\n"); - } - - return BasicBlockVisit::Continue; - }); - } - bool changed = false; #ifdef TARGET_64BIT + ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { @@ -1057,7 +999,6 @@ PhaseStatus Compiler::optInductionVariables() loop->VisitLoopBlocks([=](BasicBlock* block) { - compCurBB = block; for (Statement* stmt : block->NonPhiStatements()) { JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, From 0df9a039b6b6c05af859386e58a168db46ab42e1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 3 Feb 2024 17:39:37 +0100 Subject: [PATCH 13/64] Do not swap addrecs unnecessarily when folding --- src/coreclr/jit/inductionvariableopts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 25191fc5e1523f..81feb602d11708 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -582,7 +582,7 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) { // Normalize addrecs to the left - if (op2->OperIs(ScevOper::AddRec)) + if (op2->OperIs(ScevOper::AddRec) && !op1->OperIs(ScevOper::AddRec)) { std::swap(op1, op2); } From cf31ff68f3258c8ea783019468700100aea3120d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 3 Feb 2024 19:52:16 +0100 Subject: [PATCH 14/64] Add some todos --- src/coreclr/jit/inductionvariableopts.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 81feb602d11708..bfdd2ac5bf02ff 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -827,6 +827,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen if (node->OperIs(GT_CAST)) { GenTreeCast* cast = node->AsCast(); + // TODO: Overflows if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) { GenTree* op = cast->CastOp(); @@ -939,6 +940,8 @@ PhaseStatus Compiler::optInductionVariables() continue; } + // TODO: Skip DNERs? + Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) { From c498884a6994aa6d175bc9031a1bb571e831938b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 3 Feb 2024 14:35:56 +0100 Subject: [PATCH 15/64] JIT: Support more scaled addressing modes on arm64 We currently support scaled addressing modes when the index also needs an extension through contained `BFIZ` nodes. However, we did not support scaled addressing modes if the index was 64 bits. This adds that support as a natural extension to the `GT_LEA`. --- src/coreclr/jit/codegen.h | 10 ++- src/coreclr/jit/codegencommon.cpp | 100 +++++++++++++++-------------- src/coreclr/jit/codegeninterface.h | 1 + src/coreclr/jit/compiler.hpp | 44 ++++++++++++- src/coreclr/jit/gentree.cpp | 27 ++++---- src/coreclr/jit/lower.cpp | 43 ++++++------- 6 files changed, 137 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 18a27b41aff49e..0f91f798bb9468 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -31,8 +31,14 @@ class CodeGen final : public CodeGenInterface // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and // move it to Lower - virtual bool genCreateAddrMode( - GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr); + virtual bool genCreateAddrMode(GenTree* addr, + bool fold, + unsigned naturalMul, + bool* revPtr, + GenTree** rv1Ptr, + GenTree** rv2Ptr, + unsigned* mulPtr, + ssize_t* cnsPtr); #ifdef LATE_DISASM virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 44de5c336ad518..c468473067b6e1 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -965,7 +965,7 @@ void CodeGen::genDefineInlineTempLabel(BasicBlock* label) // Notes: // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer, // and if 'block' is a throw helper block with a non-zero stack level. - +// void CodeGen::genAdjustStackLevel(BasicBlock* block) { #if !FEATURE_FIXED_OUT_ARGS @@ -1003,35 +1003,33 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block) #endif // !FEATURE_FIXED_OUT_ARGS } -/***************************************************************************** - * - * Take an address expression and try to find the best set of components to - * form an address mode; returns non-zero if this is successful. - * - * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code. - * Refactor this code so that the underlying analysis can be used in - * the RyuJIT Backend to do lowering, instead of having to call this method with the - * option to not generate the code. - * - * 'fold' specifies if it is OK to fold the array index which hangs off - * a GT_NOP node. - * - * If successful, the parameters will be set to the following values: - * - * *rv1Ptr ... base operand - * *rv2Ptr ... optional operand - * *revPtr ... true if rv2 is before rv1 in the evaluation order - * *mulPtr ... optional multiplier (2/4/8) for rv2 - * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0. - * *cnsPtr ... integer constant [optional] - * - * IMPORTANT NOTE: This routine doesn't generate any code, it merely - * identifies the components that might be used to - * form an address mode later on. - */ - -bool CodeGen::genCreateAddrMode( - GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr) +//------------------------------------------------------------------------ +// genCreateAddrMode: +// Take an address expression and try to find the best set of components to +// form an address mode; returns true if this is successful. +// +// Parameters: +// addr - Tree that potentially computes an address +// fold - Secifies if it is OK to fold the array index which hangs off a GT_NOP node. +// naturalMul - For arm64 specifies the natural multiplier for the address mode (i.e. the size of the parent +// indirection). +// revPtr - [out] True if rv2 is before rv1 in the evaluation order +// rv1Ptr - [out] Base operand +// rv2Ptr - [out] Optional operand +// mulPtr - [out] Optional multiplier for rv2. If non-zero and naturalMul is non-zero, it must match naturalMul. +// cnsPtr - [out] Integer constant [optional] +// +// Returns: +// True if some address mode components were extracted. +// +bool CodeGen::genCreateAddrMode(GenTree* addr, + bool fold, + unsigned naturalMul, + bool* revPtr, + GenTree** rv1Ptr, + GenTree** rv2Ptr, + unsigned* mulPtr, + ssize_t* cnsPtr) { /* The following indirections are valid address modes on x86/x64: @@ -1171,8 +1169,7 @@ bool CodeGen::genCreateAddrMode( goto AGAIN; -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. + // TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_MUL: if (op1->gtOverflow()) { @@ -1182,10 +1179,11 @@ bool CodeGen::genCreateAddrMode( FALLTHROUGH; case GT_LSH: - - mul = op1->GetScaledIndex(); - if (mul) + { + unsigned mulCandidate = op1->GetScaledIndex(); + if (jitIsScaleIndexMul(mulCandidate, naturalMul)) { + mul = mulCandidate; /* We can use "[mul*rv2 + icon]" */ rv1 = nullptr; @@ -1194,7 +1192,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + } default: break; @@ -1215,8 +1213,8 @@ bool CodeGen::genCreateAddrMode( switch (op1->gtOper) { -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. +#ifdef TARGET_XARCH + // TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_ADD: if (op1->gtOverflow()) @@ -1237,6 +1235,7 @@ bool CodeGen::genCreateAddrMode( } } break; +#endif // TARGET_XARCH case GT_MUL: @@ -1248,11 +1247,12 @@ bool CodeGen::genCreateAddrMode( FALLTHROUGH; case GT_LSH: - - mul = op1->GetScaledIndex(); - if (mul) + { + unsigned mulCandidate = op1->GetScaledIndex(); + if (jitIsScaleIndexMul(mulCandidate, naturalMul)) { /* 'op1' is a scaled value */ + mul = mulCandidate; rv1 = op2; rv2 = op1->AsOp()->gtOp1; @@ -1260,7 +1260,7 @@ bool CodeGen::genCreateAddrMode( int argScale; while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) { - if (jitIsScaleIndexMul(argScale * mul)) + if (jitIsScaleIndexMul(argScale * mul, naturalMul)) { mul = mul * argScale; rv2 = rv2->AsOp()->gtOp1; @@ -1277,7 +1277,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64 + } case GT_COMMA: @@ -1291,7 +1291,7 @@ bool CodeGen::genCreateAddrMode( noway_assert(op2); switch (op2->gtOper) { -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) +#ifdef TARGET_XARCH // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because // arm doesn't support both scale and offset at the same. Offset is handled // at the emitter as a peephole optimization. @@ -1315,6 +1315,7 @@ bool CodeGen::genCreateAddrMode( goto AGAIN; } break; +#endif // TARGET_XARCH case GT_MUL: @@ -1326,16 +1327,17 @@ bool CodeGen::genCreateAddrMode( FALLTHROUGH; case GT_LSH: - - mul = op2->GetScaledIndex(); - if (mul) + { + unsigned mulCandidate = op2->GetScaledIndex(); + if (jitIsScaleIndexMul(mulCandidate, naturalMul)) { + mul = mulCandidate; // 'op2' is a scaled value...is it's argument also scaled? int argScale; rv2 = op2->AsOp()->gtOp1; while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) { - if (jitIsScaleIndexMul(argScale * mul)) + if (jitIsScaleIndexMul(argScale * mul, naturalMul)) { mul = mul * argScale; rv2 = rv2->AsOp()->gtOp1; @@ -1351,7 +1353,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 + } case GT_COMMA: diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index c42ddd7d36d4dd..a28a60b50ca8cd 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -113,6 +113,7 @@ class CodeGenInterface // move it to Lower virtual bool genCreateAddrMode(GenTree* addr, bool fold, + unsigned naturalMul, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 216a0c82d51d74..bc1fe8acc5a305 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -196,10 +196,45 @@ inline unsigned __int64 BitsBetween(unsigned __int64 value, unsigned __int64 end (end - 1); // Ones to the right of set bit in the end mask. } -/*****************************************************************************/ - +//------------------------------------------------------------------------------ +// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier +// amount for x64. +// +// Parameters: +// val - The multiplier +// +// Returns: +// True if value is 1, 2, 4 or 8. +// inline bool jitIsScaleIndexMul(size_t val) { + // TODO-Cleanup: On arm64 the scale that can be used in addressing modes + // depends on the containing indirection, so this function should be reevaluated. + switch (val) + { + case 1: + case 2: + case 4: + case 8: + return true; + default: + return false; + } +} + +//------------------------------------------------------------------------------ +// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier amount. +// +// Parameters: +// val - The multiplier +// naturalMul - For arm64, the natural multiplier (size of containing indirection) +// +// Returns: +// True if the multiplier can be used in an address mode. +// +inline bool jitIsScaleIndexMul(size_t val, unsigned naturalMul) +{ +#if defined(TARGET_XARCH) switch (val) { case 1: @@ -211,6 +246,11 @@ inline bool jitIsScaleIndexMul(size_t val) default: return false; } +#elif defined(TARGET_ARM64) + return val == naturalMul; +#else + return false; +#endif } // Returns "tree" iff "val" is a valid addressing mode scale shift amount on diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b23eb5d0eb432f..19e0b76047458a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4538,20 +4538,21 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ GenTree* base; // This is the base of the address. GenTree* idx; // This is the index. - if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, &mul, &cns)) - { + unsigned naturalMul = 0; +#ifdef TARGET_ARM64 + // Multiplier should be a "natural-scale" power of two number which is equal to target's width. + // + // *(ulong*)(data + index * 8); - can be optimized + // *(ulong*)(data + index * 7); - can not be optimized + // *(int*)(data + index * 2); - can not be optimized + // + naturalMul = genTypeSize(type); +#endif -#ifdef TARGET_ARMARCH - // Multiplier should be a "natural-scale" power of two number which is equal to target's width. - // - // *(ulong*)(data + index * 8); - can be optimized - // *(ulong*)(data + index * 7); - can not be optimized - // *(int*)(data + index * 2); - can not be optimized - // - if ((mul > 0) && (genTypeSize(type) != mul)) - { - return false; - } + if (codeGen->genCreateAddrMode(addr, false /*fold*/, naturalMul, &rev, &base, &idx, &mul, &cns)) + { +#ifdef TARGET_ARM64 + assert((mul == 0) || (mul == 1) || (mul == naturalMul)); #endif // We can form a complex addressing mode, so mark each of the interior diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c859bbe8761351..1cec2935f7922b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6135,14 +6135,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par ssize_t offset = 0; bool rev = false; + var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF; + + unsigned naturalMul = 0; +#ifdef TARGET_ARM64 + // Multiplier should be a "natural-scale" power of two number which is equal to target's width. + // + // *(ulong*)(data + index * 8); - can be optimized + // *(ulong*)(data + index * 7); - can not be optimized + // *(int*)(data + index * 2); - can not be optimized + // + naturalMul = genTypeSize(targetType); +#endif + // Find out if an addressing mode can be constructed - bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address - true, // fold - &rev, // reverse ops - &base, // base addr - &index, // index val - &scale, // scaling - &offset); // displacement + bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address + true, // fold + naturalMul, // natural multiplier + &rev, // reverse ops + &base, // base addr + &index, // index val + &scale, // scaling + &offset); // displacement #ifdef TARGET_ARM64 if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile()) @@ -6158,21 +6172,6 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par } #endif - var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF; - -#ifdef TARGET_ARMARCH - // Multiplier should be a "natural-scale" power of two number which is equal to target's width. - // - // *(ulong*)(data + index * 8); - can be optimized - // *(ulong*)(data + index * 7); - can not be optimized - // *(int*)(data + index * 2); - can not be optimized - // - if ((scale > 0) && (genTypeSize(targetType) != scale)) - { - return false; - } -#endif - if (scale == 0) { scale = 1; From 39b2a6d027dce2b62933488b0366cafd9b97a1e6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 5 Feb 2024 13:34:08 +0100 Subject: [PATCH 16/64] Move phase later; compute loops; address some TODOs --- src/coreclr/jit/compiler.cpp | 14 +++++------ src/coreclr/jit/inductionvariableopts.cpp | 30 +++++++++++++++-------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d648a9f46c71ad..3e2077747ca862 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4940,13 +4940,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_EARLY_PROP, &Compiler::optEarlyProp); } - if (doOptimizeIVs) - { - // Simplify and optimize induction variables used in natural loops - // - DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables); - } - if (doValueNum) { // Value number the trees @@ -5009,6 +5002,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_OPTIMIZE_INDEX_CHECKS, &Compiler::rangeCheckPhase); } + if (doOptimizeIVs) + { + // Simplify and optimize induction variables used in natural loops + // + DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables); + } + if (doVNBasedDeadStoreRemoval) { // Note: this invalidates SSA and value numbers on tree nodes. diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index bfdd2ac5bf02ff..0bc3ce255e5c1d 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -848,17 +848,16 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen node->AsLclVarCommon()->SetLclNum(m_newLclNum); // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals. break; - case GT_LCL_FLD: - case GT_STORE_LCL_FLD: // TODO: Do we need to skip widening if we have one of these? - node->AsLclFld()->SetLclNum(m_newLclNum); - m_compiler->lvaSetVarDoNotEnregister(m_newLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); - break; case GT_STORE_LCL_VAR: node->AsLclVarCommon()->SetLclNum(m_newLclNum); node->AsLclVarCommon()->gtType = TYP_LONG; node->AsLclVarCommon()->Data() = m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); break; + case GT_LCL_FLD: + case GT_STORE_LCL_FLD: + assert(!"Unexpected field use for local not marked as DNER"); + break; default: break; } @@ -913,6 +912,16 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + if (optCanonicalizeLoops()) + { + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + changed = true; + } + #ifdef TARGET_64BIT ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); @@ -940,7 +949,11 @@ PhaseStatus Compiler::optInductionVariables() continue; } - // TODO: Skip DNERs? + if (lvaGetDesc(lcl)->lvDoNotEnregister) + { + JITDUMP(" V%02u is marked DNER\n", lcl->GetLclNum()); + continue; + } Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) @@ -1019,10 +1032,7 @@ PhaseStatus Compiler::optInductionVariables() } #endif - if (changed) - { - fgSsaBuild(); - } + fgInvalidateDfsTree(); return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From 6f114257d3c6505317d6700551564e8a841865bc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 5 Feb 2024 14:43:16 +0100 Subject: [PATCH 17/64] Add docs, enhance optIsIVWideningProfitable when zero extensions are sources of stores --- src/coreclr/jit/inductionvariableopts.cpp | 458 +++++++++++++++++----- 1 file changed, 350 insertions(+), 108 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 0bc3ce255e5c1d..f44f729530b6dc 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1,8 +1,21 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +// This file contains code to analyze how the value of induction variables +// evolve (scalar evolution analysis) and to do optimizations based on it. +// Currently the only optimization done is IV widening. +// The scalar evolution analysis is inspired by "Michael Wolfe. 1992. Beyond +// induction variables." and also by LLVM's scalar evolution. + #include "jitpch.h" +// Evolving values are described using a small IR based around the following +// possible operations. At the core is ScevOper::AddRec, which represents a +// value that evolves by an add recurrence. In dumps it is described by where "loop" is the loop the value is evolving in, "start" is +// the initial value and "step" is the step by which the value evolves in every +// iteration. +// enum class ScevOper { Constant, @@ -28,8 +41,8 @@ static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail) struct Scev { - ScevOper Oper; - var_types Type; + const ScevOper Oper; + const var_types Type; Scev(ScevOper oper, var_types type) : Oper(oper), Type(type) { @@ -63,8 +76,8 @@ struct ScevLocal : Scev { } - unsigned LclNum; - unsigned SsaNum; + const unsigned LclNum; + const unsigned SsaNum; }; struct ScevUnop : Scev @@ -73,7 +86,7 @@ struct ScevUnop : Scev { } - Scev* Op1; + Scev* const Op1; }; struct ScevBinop : ScevUnop @@ -82,27 +95,157 @@ struct ScevBinop : ScevUnop { } - Scev* Op2; + Scev* const Op2; }; // Represents a value that evolves by an add recurrence. // The value at iteration N is Start + N * Step. -// "Step" is guaranteed to be invariant in "Loop". +// "Start" and "Step" are guaranteed to be invariant in "Loop". struct ScevAddRec : Scev { - ScevAddRec(var_types type, FlowGraphNaturalLoop* loop, Scev* start, Scev* step) - : Scev(ScevOper::AddRec, type), Loop(loop), Start(start), Step(step) + ScevAddRec(var_types type, Scev* start, Scev* step) + : Scev(ScevOper::AddRec, type), Start(start), Step(step) { } - FlowGraphNaturalLoop* Loop; - Scev* Start; - Scev* Step; + Scev* const Start; + Scev* const Step; }; typedef JitHashTable, Scev*> ScalarEvolutionMap; -static void DumpScev(Scev* scev) +// Scalar evolution is analyzed in the context of a single loop, and are +// computed on-demand by the use of the "Analyze" method on this class, which +// also maintains a cache. +class ScalarEvolutionContext +{ + Compiler* m_comp; + FlowGraphNaturalLoop* m_loop = nullptr; + ScalarEvolutionMap m_cache; + + Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); + Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, + Scev* start, + BasicBlock* stepDefBlock, + GenTree* stepDefData); + Scev* CreateSimpleInvariantScev(GenTree* tree); + Scev* CreateScevForConstant(GenTreeIntConCommon* tree); + +public: + ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) + { + } + + void DumpScev(Scev* scev); + + //------------------------------------------------------------------------ + // ResetForLoop: Reset the internal cache in preparation of scalar + // evolution analysis inside a new loop. + // + // Parameters: + // loop - The loop. + // + void ResetForLoop(FlowGraphNaturalLoop* loop) + { + m_loop = loop; + m_cache.RemoveAll(); + } + + //------------------------------------------------------------------------ + // NewConstant: Create a SCEV node that represents a constant. + // + // Returns: + // The new node. + // + ScevConstant* NewConstant(var_types type, int64_t value) + { + ScevConstant* constant = new (m_comp, CMK_LoopScalarEvolution) ScevConstant(type, value); + return constant; + } + + //------------------------------------------------------------------------ + // NewLocal: Create a SCEV node that represents an invariant local (i.e. a + // use of an SSA def from outside the loop). + // + // Parameters: + // lclNum - The local + // ssaNum - The SSA number of the def outside the loop that is being used. + // + // Returns: + // The new node. + // + ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) + { + var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); + ScevLocal* invariantLocal = new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); + return invariantLocal; + } + + //------------------------------------------------------------------------ + // NewExtension: Create a SCEV node that represents a zero or sign extension. + // + // Parameters: + // oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend) + // targetType - The target type of the extension + // op - The operand being extended. + // + // Returns: + // The new node. + // + ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) + { + assert(op != nullptr); + ScevUnop* ext = new (m_comp, CMK_LoopScalarEvolution) ScevUnop(oper, targetType, op); + return ext; + } + + //------------------------------------------------------------------------ + // NewBinop: Create a SCEV node that represents a binary operation. + // + // Parameters: + // oper - The operation + // op1 - First operand + // op2 - Second operand + // + // Returns: + // The new node. + // + ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) + { + assert((op1 != nullptr) && (op2 != nullptr)); + ScevBinop* binop = new (m_comp, CMK_LoopScalarEvolution) ScevBinop(oper, op1->Type, op1, op2); + return binop; + } + + //------------------------------------------------------------------------ + // NewAddRec: Create a SCEV node that represents a new add recurrence. + // + // Parameters: + // start - Value of the recurrence at the first iteration + // step - Step value of the recurrence + // + // Returns: + // The new node. + // + ScevAddRec* NewAddRec(Scev* start, Scev* step) + { + assert((start != nullptr) && (step != nullptr)); + ScevAddRec* addRec = new (m_comp, CMK_LoopScalarEvolution) ScevAddRec(start->Type, start, step); + return addRec; + } + + Scev* Analyze(BasicBlock* block, GenTree* tree); + Scev* Simplify(Scev* scev); +}; + +#ifdef DEBUG +//------------------------------------------------------------------------ +// DumpScev: Print a scev node to stdout. +// +// Parameters: +// scev - The scev node. +// +void ScalarEvolutionContext::DumpScev(Scev* scev) { switch (scev->Oper) { @@ -157,7 +300,7 @@ static void DumpScev(Scev* scev) case ScevOper::AddRec: { ScevAddRec* addRec = (ScevAddRec*)scev; - printf("<" FMT_LP, addRec->Loop->GetIndex()); + printf("<" FMT_LP, m_loop->GetIndex()); printf(", "); DumpScev(addRec->Start); printf(", "); @@ -169,70 +312,18 @@ static void DumpScev(Scev* scev) unreached(); } } +#endif -class ScalarEvolutionContext -{ - Compiler* m_comp; - FlowGraphNaturalLoop* m_loop = nullptr; - ScalarEvolutionMap m_cache; - - Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); - Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - Scev* start, - BasicBlock* stepDefBlock, - GenTree* stepDefData); - Scev* CreateSimpleInvariantScev(GenTree* tree); - Scev* CreateScevForConstant(GenTreeIntConCommon* tree); - -public: - ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) - { - } - - void ResetForLoop(FlowGraphNaturalLoop* loop) - { - m_loop = loop; - m_cache.RemoveAll(); - } - - ScevConstant* NewConstant(var_types type, int64_t value) - { - ScevConstant* constant = new (m_comp, CMK_LoopScalarEvolution) ScevConstant(type, value); - return constant; - } - - ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) - { - var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); - ScevLocal* invariantLocal = new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); - return invariantLocal; - } - - ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) - { - assert(op != nullptr); - ScevUnop* ext = new (m_comp, CMK_LoopScalarEvolution) ScevUnop(oper, targetType, op); - return ext; - } - - ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) - { - assert((op1 != nullptr) && (op2 != nullptr)); - ScevBinop* binop = new (m_comp, CMK_LoopScalarEvolution) ScevBinop(oper, op1->Type, op1, op2); - return binop; - } - - ScevAddRec* NewAddRec(FlowGraphNaturalLoop* loop, Scev* start, Scev* step) - { - assert((start != nullptr) && (step != nullptr)); - ScevAddRec* addRec = new (m_comp, CMK_LoopScalarEvolution) ScevAddRec(start->Type, loop, start, step); - return addRec; - } - - Scev* Analyze(BasicBlock* block, GenTree* tree); - Scev* Fold(Scev* scev); -}; - +//------------------------------------------------------------------------ +// CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree: +// either an invariant local use or a constant. +// +// Parameters: +// tree - The tree +// +// Returns: +// SCEV node or nullptr if the tree is not a simple invariant. +// Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) { if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG)) @@ -254,6 +345,15 @@ Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) return nullptr; } +//------------------------------------------------------------------------ +// CreateScevForConstant: Given an integer constant, create a SCEV node for it. +// +// Parameters: +// tree - The integer constant +// +// Returns: +// SCEV node or nullptr if the integer constant is not representable (e.g. a handle). +// Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) { if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG)) @@ -264,6 +364,18 @@ Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); } +//------------------------------------------------------------------------ +// AnalyzeNew: Analyze the specified tree in the specified block, without going +// through the cache. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) { switch (tree->OperGet()) @@ -466,6 +578,20 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } } +//------------------------------------------------------------------------ +// CreateSimpleAddRec: Create a "simple" add-recurrence. This handles the most +// common patterns for primary induction variables where we see a store like +// "i = i + 1". +// +// Parameters: +// headerStore - Phi definition of the candidate primary induction variable +// enterScev - SCEV describing start value of the primary induction variable +// stepDefBlock - Block containing the def of the step value +// stepDefData - Value of the def of the step value +// +// Returns: +// SCEV node if this is a simple addrec shape. Otherwise nullptr. +// Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* enterScev, BasicBlock* stepDefBlock, @@ -500,9 +626,20 @@ Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStor return nullptr; } - return NewAddRec(m_loop, enterScev, stepScev); + return NewAddRec(enterScev, stepScev); } +//------------------------------------------------------------------------ +// Analyze: Analyze the specified tree in the specified block. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) { Scev* result; @@ -515,8 +652,22 @@ Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) return result; } +//------------------------------------------------------------------------ +// FoldBinop: Fold simple binops. +// +// Type parameters: +// T - Type that the binop is being evaluated in +// +// Parameters: +// oper - Binary operation +// op1 - First operand +// op2 - Second operand +// +// Returns: +// Folded value. +// template -static T FoldArith(ScevOper oper, T op1, T op2) +static T FoldBinop(ScevOper oper, T op1, T op2) { switch (oper) { @@ -531,7 +682,23 @@ static T FoldArith(ScevOper oper, T op1, T op2) } } -Scev* ScalarEvolutionContext::Fold(Scev* scev) +//------------------------------------------------------------------------ +// Simplify: Try to simplify a SCEV node by folding and canonicalization. +// +// Parameters: +// scev - The node +// +// Returns: +// Simplified node. +// +// Remarks: +// Canonicalization is done for binops; constants are moved to the right and +// addrecs are moved to the left. +// +// Simple unops/binops on constants are folded. Operands are distributed into +// add recs whenever possible. +// +Scev* ScalarEvolutionContext::Simplify(Scev* scev) { switch (scev->Oper) { @@ -546,7 +713,7 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) ScevUnop* unop = (ScevUnop*)scev; assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type)); - Scev* op1 = Fold(unop->Op1); + Scev* op1 = Simplify(unop->Op1); if (unop->Type == op1->Type) { @@ -576,8 +743,8 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) case ScevOper::Lsh: { ScevBinop* binop = (ScevBinop*)scev; - Scev* op1 = Fold(binop->Op1); - Scev* op2 = Fold(binop->Op2); + Scev* op1 = Simplify(binop->Op1); + Scev* op2 = Simplify(binop->Op2); if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) { @@ -598,11 +765,11 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) // + x => // * x => ScevAddRec* addRec = (ScevAddRec*)op1; - Scev* newStart = Fold(NewBinop(binop->Oper, addRec->Start, op2)); + Scev* newStart = Simplify(NewBinop(binop->Oper, addRec->Start, op2)); Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) - ? Fold(NewBinop(binop->Oper, addRec->Step, op2)) + ? Simplify(NewBinop(binop->Oper, addRec->Step, op2)) : addRec->Step; - return NewAddRec(addRec->Loop, newStart, newStep); + return NewAddRec(newStart, newStep); } if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant)) @@ -612,13 +779,13 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) int64_t newValue; if (binop->TypeIs(TYP_INT)) { - newValue = FoldArith(binop->Oper, static_cast(cns1->Value), + newValue = FoldBinop(binop->Oper, static_cast(cns1->Value), static_cast(cns2->Value)); } else { assert(binop->TypeIs(TYP_LONG)); - newValue = FoldArith(binop->Oper, cns1->Value, cns2->Value); + newValue = FoldBinop(binop->Oper, cns1->Value, cns2->Value); } return NewConstant(binop->Type, newValue); @@ -629,15 +796,38 @@ Scev* ScalarEvolutionContext::Fold(Scev* scev) case ScevOper::AddRec: { ScevAddRec* addRec = (ScevAddRec*)scev; - Scev* start = Fold(addRec->Start); - Scev* step = Fold(addRec->Step); - return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(addRec->Loop, start, step); + Scev* start = Simplify(addRec->Start); + Scev* step = Simplify(addRec->Step); + return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(start, step); } default: unreached(); } } +//------------------------------------------------------------------------ +// optCanSinkWidenedIV: Check to see if we are able to sink a store to the old +// local into the exits of a loop if we decide to widen. +// +// Parameters: +// lclNum - The primary induction variable +// loop - The loop +// +// Returns: +// True if we can sink a store to the old local after widening. +// +// Remarks: +// This handles the situation where the primary induction variable is used +// after the loop. In those cases we need to store the widened local back +// into the old one in the exits where the IV variable is live. +// +// We are able to sink when none of the exits are critical blocks, in the +// sense that all their predecessors must come from inside the loop. +// +// TODO-CQ: If we canonicalize loop exits we can guarantee this property for +// regular exits; that will allow us to always sink except when the loop is +// enclosed in a try region whose handler also uses the IV variable. +// bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); @@ -667,6 +857,27 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return result == BasicBlockVisit::Continue; } +//------------------------------------------------------------------------ +// optIsIVWideningProfitable: Check to see if IV widening is profitable. +// +// Parameters: +// lclNum - The primary induction variable +// addRec - Value of the induction variable +// loop - The loop +// +// Returns: +// True if IV widening is profitable. +// +// Remarks: +// IV widening is generally profitable when it allows us to remove casts +// inside the loop. However, it may also introduce other reg-reg moves: +// 1. We may need to store the narrow IV into the wide one in the +// preheader. This is necessary when the start value is not constant. If +// the start value _is_ constant then we assume that the constant store to +// the narrow local will be a DCE'd. +// 2. We need to store the wide IV back into the narrow one in each of +// the exits where the narrow IV is live-in. +// bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, FlowGraphNaturalLoop* loop) { struct CountZeroExtensionsVisitor : GenTreeVisitor @@ -689,21 +900,32 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) { GenTree* node = *use; - if (node->OperIs(GT_CAST)) + if (!node->OperIs(GT_CAST)) { - GenTreeCast* cast = node->AsCast(); - if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) - { - GenTree* op = cast->CastOp(); - if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) - { - NumExtensions++; - return WALK_SKIP_SUBTREES; - } - } + return WALK_CONTINUE; + } + + GenTreeCast* cast = node->AsCast(); + if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned()) + { + return WALK_CONTINUE; } - return WALK_CONTINUE; + GenTree* op = cast->CastOp(); + if (!op->OperIs(GT_LCL_VAR) || (op->AsLclVarCommon()->GetLclNum() != m_lclNum)) + { + return WALK_CONTINUE; + } + + // If this is already the source of a store then it is going to be + // free in our backends regardless. + if ((parent != nullptr) && parent->OperIs(GT_STORE_LCL_VAR)) + { + return WALK_CONTINUE; + } + + NumExtensions++; + return WALK_SKIP_SUBTREES; } }; @@ -735,9 +957,8 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl } else { - // If this is a constant then we are likely going to save the cost of - // initializing the narrow local which will balance out initializing - // the widened local. + // If this is a constant then we make the assumption that we will be + // able to DCE the constant initialization of the narrow local. } // Now account for the cost of sinks. @@ -776,6 +997,18 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl return false; } +//------------------------------------------------------------------------ +// optSinkWidenedIV: Create stores back to the narrow IV in the exits where +// that is necessary. +// +// Parameters: +// lclNum - Narrow version of primary induction variable +// newLclNum - Wide version of primary induction variable +// loop - The loop +// +// Returns: +// True if any store was created in any exit block. +// bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { bool anySunk = false; @@ -800,6 +1033,15 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa return anySunk; } +//------------------------------------------------------------------------ +// optReplaceWidenedIV: Replace uses of the narrow IV with the wide IV in the +// specified statement. +// +// Parameters: +// lclNum - Narrow version of primary induction variable +// newLclNum - Wide version of primary induction variable +// stmt - The statement to replace uses in. +// void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt) { struct ReplaceVisitor : GenTreeVisitor @@ -962,9 +1204,9 @@ PhaseStatus Compiler::optInductionVariables() continue; } - scev = scevContext.Fold(scev); + scev = scevContext.Simplify(scev); JITDUMP(" => "); - DBEXEC(verbose, DumpScev(scev)); + DBEXEC(verbose, scevContext.DumpScev(scev)); JITDUMP("\n"); if (!scev->OperIs(ScevOper::AddRec)) { From 04c3652d663a24c32aa7b1c344888e995f86b233 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 5 Feb 2024 14:43:41 +0100 Subject: [PATCH 18/64] Run jit-format --- src/coreclr/jit/inductionvariableopts.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index f44f729530b6dc..0ef03d3f23d9df 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -5,7 +5,7 @@ // evolve (scalar evolution analysis) and to do optimizations based on it. // Currently the only optimization done is IV widening. // The scalar evolution analysis is inspired by "Michael Wolfe. 1992. Beyond -// induction variables." and also by LLVM's scalar evolution. +// induction variables." and also by LLVM's scalar evolution. #include "jitpch.h" @@ -103,8 +103,7 @@ struct ScevBinop : ScevUnop // "Start" and "Step" are guaranteed to be invariant in "Loop". struct ScevAddRec : Scev { - ScevAddRec(var_types type, Scev* start, Scev* step) - : Scev(ScevOper::AddRec, type), Start(start), Step(step) + ScevAddRec(var_types type, Scev* start, Scev* step) : Scev(ScevOper::AddRec, type), Start(start), Step(step) { } @@ -1155,13 +1154,13 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); if (optCanonicalizeLoops()) { fgInvalidateDfsTree(); m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - changed = true; + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + changed = true; } #ifdef TARGET_64BIT From b16aa2e4558a5809893c6dd59f7e5f11007c5ca6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 5 Feb 2024 14:51:13 +0100 Subject: [PATCH 19/64] Remove unnecessary config var --- src/coreclr/jit/inductionvariableopts.cpp | 5 ----- src/coreclr/jit/jitconfigvalues.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 0ef03d3f23d9df..76424ae6a9c1f8 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1136,11 +1136,6 @@ PhaseStatus Compiler::optInductionVariables() { JITDUMP("*************** In optInductionVariables()\n"); - if (JitConfig.JitEnableInductionVariableOpts() == 0) - { - return PhaseStatus::MODIFIED_NOTHING; - } - #ifdef DEBUG static ConfigMethodRange s_range; s_range.EnsureInit(JitConfig.JitEnableInductionVariableOptsRange()); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 767ff46276cebe..7c6e95bbb88a3e 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -695,8 +695,6 @@ CONFIG_INTEGER(JitEnablePhysicalPromotion, W("JitEnablePhysicalPromotion"), 1) // Enable cross-block local assertion prop CONFIG_INTEGER(JitEnableCrossBlockLocalAssertionProp, W("JitEnableCrossBlockLocalAssertionProp"), 1) -CONFIG_INTEGER(JitEnableInductionVariableOpts, W("JitEnableInductionVariableOpts"), 1) - #if defined(DEBUG) // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the // file, certain other JIT config variables will be active. If the currently compiled function is not in the file, From 666bcf36f28dc6864abcc9d81af09972f61d5fb7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 15:12:21 +0100 Subject: [PATCH 20/64] JIT: Canonicalize loop exits This adds another canonicalization requirement for loops during the optimization phases, namely that all regular loop exit blocks have only loop predecessors. This gives a natural place to insert IR that we want to run only when we know the loop was entered. Exceptional loop exit blocks can still have non-loop predecessors, so these must still be accounted for by optimizations. --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 15 ++- src/coreclr/jit/compiler.hpp | 45 +++++++ src/coreclr/jit/fgdiagnostic.cpp | 10 +- src/coreclr/jit/loopcloning.cpp | 46 +++++++- src/coreclr/jit/optimizer.cpp | 193 ++++++++++++++++++++++++++----- 6 files changed, 275 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5fac5c8e0704c6..d2e98f4afaa315 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5035,7 +5035,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl } } - optLoopsRequirePreHeaders = false; + optLoopsCanonical = false; #ifdef DEBUG DoPhase(this, PHASE_STRESS_SPLIT_TREE, &Compiler::StressSplitTree); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 48bd2523cd03eb..6113f8af235855 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2184,6 +2184,9 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocksLexical(TFunc func); + template + BasicBlockVisit VisitRegularExitBlocks(TFunc func); + BasicBlock* GetLexicallyTopMostBlock(); BasicBlock* GetLexicallyBottomMostBlock(); @@ -4971,7 +4974,10 @@ class Compiler FlowGraphDominatorTree* m_domTree; BlockReachabilitySets* m_reachabilitySets; - bool optLoopsRequirePreHeaders; // Do we require that all loops (in m_loops) have pre-headers? + // Do we require loops to be in canonical form? + // 1. All loops have preheaders (single entry blocks that always enter the loop) + // 2. All regular loop exits have only loop predecessors + bool optLoopsCanonical; unsigned optNumNaturalLoopsFound; // Number of natural loops found in the loop finding phase bool fgBBVarSetsInited; @@ -5906,7 +5912,7 @@ class Compiler PhaseStatus fgCanonicalizeFirstBB(); - void fgSetEHRegionForNewPreheader(BasicBlock* preheader); + void fgSetEHRegionForNewPreheaderOrExit(BasicBlock* preheader); void fgUnreachableBlock(BasicBlock* block); @@ -6785,6 +6791,8 @@ class Compiler void optFindLoops(); bool optCanonicalizeLoops(); + void optFindAndCanonicalizeLoops(); + void optCompactLoops(); void optCompactLoop(FlowGraphNaturalLoop* loop); BasicBlock* optFindLoopCompactionInsertionPoint(FlowGraphNaturalLoop* loop, BasicBlock* top); @@ -6792,6 +6800,9 @@ class Compiler bool optCreatePreheader(FlowGraphNaturalLoop* loop); void optSetPreheaderWeight(FlowGraphNaturalLoop* loop, BasicBlock* preheader); + bool optCanonicalizeExits(FlowGraphNaturalLoop* loop); + bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit); + PhaseStatus optCloneLoops(); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index d664a57bd24143..422eb741847e7f 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4969,6 +4969,51 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) return BasicBlockVisit::Continue; } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitRegularExitBlocks: Visit blocks that are outside +// the loop but that may have regular predecessors inside the loop. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) +{ + Compiler* comp = m_dfsTree->GetCompiler(); + + BitVecTraits traits = m_dfsTree->PostOrderTraits(); + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + for (FlowEdge* edge : ExitEdges()) + { + BasicBlockVisit result = edge->getSourceBlock()->VisitRegularSuccs(comp, [&](BasicBlock* succ) { + assert(m_dfsTree->Contains(succ)); + if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && + (func(succ) == BasicBlockVisit::Abort)) + { + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + return BasicBlockVisit::Abort; + } + } + + return BasicBlockVisit::Continue; +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index ed3f4252bed5ab..ff5cf82ae4456e 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4719,12 +4719,20 @@ void Compiler::fgDebugCheckLoops() { return; } - if (optLoopsRequirePreHeaders) + if (optLoopsCanonical) { for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { assert(loop->EntryEdges().size() == 1); assert(loop->EntryEdge(0)->getSourceBlock()->KindIs(BBJ_ALWAYS)); + + loop->VisitRegularExitBlocks([=](BasicBlock* exit) { + for (BasicBlock* pred : exit->PredBlocks()) + { + assert(loop->ContainsBlock(pred)); + } + return BasicBlockVisit::Continue; + }); } } } diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 23848154bcff15..834e9e3f9d27cf 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2057,6 +2057,41 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex assert(condLast->NextIs(fastPreheader)); condLast->SetFalseTarget(fastPreheader); fgAddRefPred(fastPreheader, condLast); + + //// Now canonicalize exits for both the cold and hot loops. + // ArrayStack exitBlocks(getAllocator(CMK_LoopClone)); + // loop->VisitRegularExitBlocks([&exitBlocks](BasicBlock* exit) { + // exitBlocks.Push(exit); + // return BasicBlockVisit::Continue; + //}); + + // for (int i = 0; i < exitBlocks.Height(); i++) + //{ + // BasicBlock* exit = exitBlocks.Bottom(i); + // // Canonicalization should have already ensured this. + // assert(!exit->KindIs(BBJ_CALLFINALLY)); + + // BasicBlock* coldExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); + // coldExit->SetFlags(BBF_NONE_QUIRK | BBF_INTERNAL); + // coldExit->bbCodeOffs = exit->bbCodeOffs; + // fgSetEHRegionForNewPreheader(coldExit); + // fgAddRefPred(exit, coldExit); + + // BasicBlock* hotExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); + // hotExit->SetFlags(BBF_NONE_QUIRK | BBF_INTERNAL); + // hotExit->bbCodeOffs = exit->bbCodeOffs; + // fgSetEHRegionForNewPreheader(hotExit); + // fgAddRefPred(exit, hotExit); + + // for (BasicBlock* pred : exit->PredBlocks()) + // { + // if (loop->ContainsBlock(pred)) + // { + // fgReplaceJumpTarget(pred, exit, hotExit); + // fgReplaceJumpTarget((*blockMap)[pred], exit, coldExit); + // } + // } + //} } //------------------------------------------------------------------------- @@ -2969,11 +3004,18 @@ PhaseStatus Compiler::optCloneLoops() if (optLoopsCloned > 0) { - fgRenumberBlocks(); - fgInvalidateDfsTree(); m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + if (optCanonicalizeLoops()) + { + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + + fgRenumberBlocks(); } #ifdef DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 05eb5c7d3b9fb1..5e53f26082649a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -21,8 +21,8 @@ void Compiler::optInit() { fgHasLoops = false; - optLoopsRequirePreHeaders = false; - optNumNaturalLoopsFound = 0; + optLoopsCanonical = false; + optNumNaturalLoopsFound = 0; #ifdef DEBUG loopAlignCandidates = 0; @@ -1319,6 +1319,13 @@ PhaseStatus Compiler::optUnrollLoops() fgDfsBlocksAndRemove(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + if (optCanonicalizeLoops()) + { + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + fgRenumberBlocks(); DBEXEC(verbose, fgDispBasicBlocks()); @@ -2680,8 +2687,8 @@ void Compiler::optFindLoops() fgRenumberBlocks(); - // Starting now, we require all loops to have pre-headers. - optLoopsRequirePreHeaders = true; + // Starting now we require all loops to be in canonical form. + optLoopsCanonical = true; // Leave a bread crumb for future phases like loop alignment about whether // looking for loops makes sense. We generally do not expect phases to @@ -2709,11 +2716,28 @@ void Compiler::optFindLoops() bool Compiler::optCanonicalizeLoops() { bool changed = false; + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { changed |= optCreatePreheader(loop); } + // At this point we've created preheaders. That means we are working with + // stale loop and DFS data. However, we can do exit canonicalization even + // on the stale data; this relies on the fact that exiting blocks do not + // change as a result of creating preheaders. On the other hand the exit + // blocks themselves may have changed (previously it may have been another + // loop's header, now it might be its preheader instead). Exit + // canonicalization stil works even with this. + // + // The exit canonicalization needs to be done in post order (inner -> outer + // loops) so that inner exits that also exit outer loops have proper exit + // blocks created for each loop. + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) + { + changed |= optCanonicalizeExits(loop); + } + return changed; } @@ -2948,7 +2972,7 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop) BasicBlock* preheader = fgNewBBbefore(BBJ_ALWAYS, insertBefore, false, header); preheader->SetFlags(BBF_INTERNAL); - fgSetEHRegionForNewPreheader(preheader); + fgSetEHRegionForNewPreheaderOrExit(preheader); if (preheader->NextIs(header)) { @@ -3103,6 +3127,119 @@ void Compiler::optSetPreheaderWeight(FlowGraphNaturalLoop* loop, BasicBlock* pre edgeFromPreheader->setEdgeWeights(preheader->bbWeight, preheader->bbWeight, loop->GetHeader()); } +//----------------------------------------------------------------------------- +// optCanonicalizeExits: Canonicalize all regular exits of the loop so that +// they have only loop predecessors. +// +// Parameters: +// loop - The loop +// +// Returns: +// True if any flow graph modifications were made. +// +bool Compiler::optCanonicalizeExits(FlowGraphNaturalLoop* loop) +{ + bool changed = false; + + for (FlowEdge* edge : loop->ExitEdges()) + { + // Find all blocks outside the loop from this exiting block. Those + // blocks are exits. Note that we may see preheaders created by + // previous canonicalization here, which are not part of the DFS tree + // or properly maintained in a parent loop. The canonicalization here + // works despite this. + edge->getSourceBlock()->VisitRegularSuccs(this, [=, &changed](BasicBlock* succ) { + if (!loop->ContainsBlock(succ)) + { + changed |= optCanonicalizeExit(loop, succ); + } + + return BasicBlockVisit::Continue; + }); + } + + return changed; +} + +//----------------------------------------------------------------------------- +// optCanonicalizeExit: Canonicalize a single exit block +// they have only loop predecessors. +// +// Parameters: +// loop - The loop +// +// Returns: +// True if any flow graph modifications were made. +// +bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) +{ + assert(!loop->ContainsBlock(exit)); + + bool allLoopPreds = true; + for (BasicBlock* pred : exit->PredBlocks()) + { + if (!loop->ContainsBlock(pred)) + { + allLoopPreds = false; + break; + } + } + + if (allLoopPreds) + { + // Already canonical + JITDUMP("All preds of exit " FMT_BB " of " FMT_LP " are already in the loop, no exit canonicalization needed\n", + exit->bbNum, loop->GetIndex()); + return false; + } + + BasicBlock* newExit; + + if (exit->KindIs(BBJ_CALLFINALLY)) + { + // Branches to a BBJ_CALLFINALLY _must_ come from inside its associated + // try region. First try to see if the lexically bottom most block is + // part of the try; if so, inserting after that is a good choice. + BasicBlock* finallyBlock = exit->GetTarget(); + assert(finallyBlock->hasHndIndex()); + BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); + if (bottom->hasTryIndex() && (bottom->getTryIndex() == finallyBlock->getHndIndex()) && !bottom->hasHndIndex()) + { + newExit = fgNewBBafter(BBJ_ALWAYS, bottom, true, exit); + } + else + { + // Otherwise just do the heavy-handed thing and insert it anywhere in the right region. + newExit = fgNewBBinRegion(BBJ_ALWAYS, finallyBlock->bbHndIndex, 0, nullptr, exit, false, false, true); + } + } + else + { + newExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); + newExit->SetFlags(BBF_NONE_QUIRK); + fgSetEHRegionForNewPreheaderOrExit(newExit); + } + + newExit->SetFlags(BBF_INTERNAL); + + fgAddRefPred(exit, newExit); + + newExit->bbCodeOffs = exit->bbCodeOffs; + + JITDUMP("Created new exit " FMT_BB " to replace " FMT_BB " for " FMT_LP "\n", newExit->bbNum, exit->bbNum, + loop->GetIndex()); + + for (BasicBlock* pred : exit->PredBlocks()) + { + if (loop->ContainsBlock(pred)) + { + fgReplaceJumpTarget(pred, exit, newExit); + } + } + + return true; +} + /***************************************************************************** * * See if the given tree can be computed in the given precision (which must @@ -5083,43 +5220,39 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNS } //------------------------------------------------------------------------------ -// fgSetEHRegionForNewPreheader: Set the EH region for a newly inserted -// preheader. -// -// In which EH region should the header live? +// fgSetEHRegionForNewPreheaderOrExit: Set the EH region for a newly inserted +// preheader or exit block. // -// The preheader block is expected to have been added immediately before a -// block `next` in the loop that is also in the same EH region as the header. -// This is usually the lexically first block of the loop, but may also be the -// header itself. +// In which EH region should the block live? // -// If the `next` block is NOT the first block of a `try` region, the preheader -// can simply extend the header block's EH region. +// If the `next` block is NOT the first block of a `try` region, the new block +// can simply extend the next block's EH region. // // If the `next` block IS the first block of a `try`, we find its parent region // and use that. For mutual-protect regions, we need to find the actual parent, // as the block stores the most "nested" mutual region. For non-mutual-protect // regions, due to EH canonicalization, we are guaranteed that no other EH // regions begin on the same block, so looking to just the parent is -// sufficient. Note that we can't just extend the EH region of the header to -// the preheader, because the header will still be the target of backward -// branches from within the loop. If those backward branches come from outside -// the `try` (say, only the top half of the loop is a `try` region), then we -// can't branch to a non-first `try` region block (you always must enter the -// `try` in the first block). +// sufficient. +// Note that we can't just extend the EH region of the next block to the new +// block, because it may still be the target of other branches. If those +// branches come from outside the `try` then we can't branch to a non-first +// `try` region block (you always must enter the `try` in the first block). For +// example, for the preheader we can have backedges that come from outside the +// `try` (if, say, only the top half of the loop is a `try` region). For exits, +// we could similarly have branches to the old exit block from outside the `try`. // // Note that hoisting any code out of a try region, for example, to a preheader // block in a different EH region, needs to ensure that no exceptions will be -// thrown. +// thrown. Similar considerations are required for exits. // // Arguments: -// preheader - the new preheader block, which has already been added to the -// block list before a block inside the loop that shares EH -// region with the header. +// block - the new block, which has already been added to the +// block list. // -void Compiler::fgSetEHRegionForNewPreheader(BasicBlock* preheader) +void Compiler::fgSetEHRegionForNewPreheaderOrExit(BasicBlock* block) { - BasicBlock* next = preheader->Next(); + BasicBlock* next = block->Next(); if (bbIsTryBeg(next)) { @@ -5129,15 +5262,15 @@ void Compiler::fgSetEHRegionForNewPreheader(BasicBlock* preheader) if (newTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) { // No EH try index. - preheader->clearTryIndex(); + block->clearTryIndex(); } else { - preheader->setTryIndex(newTryIndex); + block->setTryIndex(newTryIndex); } // What handler region to use? Use the same handler region as `next`. - preheader->copyHndIndex(next); + block->copyHndIndex(next); } else { From 30bbdd83c067d5e7cadd1e8110e5d2ca8eaf664e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 15:16:03 +0100 Subject: [PATCH 21/64] Remove some manual canonicalization code --- src/coreclr/jit/loopcloning.cpp | 35 --------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 834e9e3f9d27cf..ca4c2572fa41d9 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2057,41 +2057,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex assert(condLast->NextIs(fastPreheader)); condLast->SetFalseTarget(fastPreheader); fgAddRefPred(fastPreheader, condLast); - - //// Now canonicalize exits for both the cold and hot loops. - // ArrayStack exitBlocks(getAllocator(CMK_LoopClone)); - // loop->VisitRegularExitBlocks([&exitBlocks](BasicBlock* exit) { - // exitBlocks.Push(exit); - // return BasicBlockVisit::Continue; - //}); - - // for (int i = 0; i < exitBlocks.Height(); i++) - //{ - // BasicBlock* exit = exitBlocks.Bottom(i); - // // Canonicalization should have already ensured this. - // assert(!exit->KindIs(BBJ_CALLFINALLY)); - - // BasicBlock* coldExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); - // coldExit->SetFlags(BBF_NONE_QUIRK | BBF_INTERNAL); - // coldExit->bbCodeOffs = exit->bbCodeOffs; - // fgSetEHRegionForNewPreheader(coldExit); - // fgAddRefPred(exit, coldExit); - - // BasicBlock* hotExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); - // hotExit->SetFlags(BBF_NONE_QUIRK | BBF_INTERNAL); - // hotExit->bbCodeOffs = exit->bbCodeOffs; - // fgSetEHRegionForNewPreheader(hotExit); - // fgAddRefPred(exit, hotExit); - - // for (BasicBlock* pred : exit->PredBlocks()) - // { - // if (loop->ContainsBlock(pred)) - // { - // fgReplaceJumpTarget(pred, exit, hotExit); - // fgReplaceJumpTarget((*blockMap)[pred], exit, coldExit); - // } - // } - //} } //------------------------------------------------------------------------- From 1bb370ca07a3f17db88cfa6bb2cc84e2fe044168 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 15:18:10 +0100 Subject: [PATCH 22/64] Remove unused declaration --- src/coreclr/jit/compiler.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6113f8af235855..9fbb16047876c6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6791,7 +6791,6 @@ class Compiler void optFindLoops(); bool optCanonicalizeLoops(); - void optFindAndCanonicalizeLoops(); void optCompactLoops(); void optCompactLoop(FlowGraphNaturalLoop* loop); From bb7462d06d8a69269f1c98ecf455eb43036071aa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 15:20:16 +0100 Subject: [PATCH 23/64] Clean up --- src/coreclr/jit/optimizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 5e53f26082649a..06240e362befb3 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3210,7 +3210,8 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) else { // Otherwise just do the heavy-handed thing and insert it anywhere in the right region. - newExit = fgNewBBinRegion(BBJ_ALWAYS, finallyBlock->bbHndIndex, 0, nullptr, exit, false, false, true); + newExit = fgNewBBinRegion(BBJ_ALWAYS, finallyBlock->bbHndIndex, 0, nullptr, exit, /* putInFilter */ false, + /* runRarely */ false, /* insertAtEnd */ true); } } else From 5af119519ab850c68d0758f701e476ece8744661 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 17:02:41 +0100 Subject: [PATCH 24/64] Fix function header --- src/coreclr/jit/optimizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 06240e362befb3..1cfdcfa6107985 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3162,8 +3162,8 @@ bool Compiler::optCanonicalizeExits(FlowGraphNaturalLoop* loop) } //----------------------------------------------------------------------------- -// optCanonicalizeExit: Canonicalize a single exit block -// they have only loop predecessors. +// optCanonicalizeExit: Canonicalize a single exit block to have only loop +// predecessors. // // Parameters: // loop - The loop From 58ebfb8141421d13e45686b3a33a0a781aecb5d4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Feb 2024 18:30:06 +0100 Subject: [PATCH 25/64] Handle x86; skip regular exit blocks that are handlers --- src/coreclr/jit/compiler.h | 5 +++-- src/coreclr/jit/compiler.hpp | 19 ++++++++++++++----- src/coreclr/jit/optimizer.cpp | 12 ++++++++++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9fbb16047876c6..257afdde43ba92 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4974,9 +4974,10 @@ class Compiler FlowGraphDominatorTree* m_domTree; BlockReachabilitySets* m_reachabilitySets; - // Do we require loops to be in canonical form? + // Do we require loops to be in canonical form? The canonical form ensures that: // 1. All loops have preheaders (single entry blocks that always enter the loop) - // 2. All regular loop exits have only loop predecessors + // 2. All loop exits where bbIsHandlerBeg(exit) is false have only loop predecessors. + // bool optLoopsCanonical; unsigned optNumNaturalLoopsFound; // Number of natural loops found in the loop finding phase diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 422eb741847e7f..7a979c94ed3bbf 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4970,8 +4970,8 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) } //------------------------------------------------------------------------------ -// FlowGraphNaturalLoop::VisitRegularExitBlocks: Visit blocks that are outside -// the loop but that may have regular predecessors inside the loop. +// FlowGraphNaturalLoop::VisitRegularExitBlocks: Visit non-handler blocks that +// are outside the loop but that may have regular predecessors inside the loop. // // Type parameters: // TFunc - Callback functor type @@ -4981,8 +4981,16 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) // BasicBlockVisit. // // Returns: -// BasicBlockVisit that indicated whether the visit was aborted by the -// callback or whether all blocks were visited. +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +// Remarks: +// Note that no handler begins are visited by this function, even if they +// have regular predecessors inside the loop (for example, finally handlers +// can have regular BBJ_CALLFINALLY predecessors inside the loop). This +// choice is motivated by the fact that such handlers will also show up as +// exceptional exit blocks that must always be handled specially by client +// code regardless. // template BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) @@ -4996,7 +5004,8 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) { BasicBlockVisit result = edge->getSourceBlock()->VisitRegularSuccs(comp, [&](BasicBlock* succ) { assert(m_dfsTree->Contains(succ)); - if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && + if (!comp->bbIsHandlerBeg(succ) && !ContainsBlock(succ) && + BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && (func(succ) == BasicBlockVisit::Abort)) { return BasicBlockVisit::Abort; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 1cfdcfa6107985..00877dadbb3d74 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3175,6 +3175,11 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) { assert(!loop->ContainsBlock(exit)); + if (bbIsHandlerBeg(exit)) + { + return false; + } + bool allLoopPreds = true; for (BasicBlock* pred : exit->PredBlocks()) { @@ -3195,11 +3200,13 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) BasicBlock* newExit; +#if FEATURE_EH_CALLFINALLY_THUNKS if (exit->KindIs(BBJ_CALLFINALLY)) { // Branches to a BBJ_CALLFINALLY _must_ come from inside its associated - // try region. First try to see if the lexically bottom most block is - // part of the try; if so, inserting after that is a good choice. + // try region, and when we have callfinally thunks the BBJ_CALLFINALLY + // is outside it. First try to see if the lexically bottom most block + // is part of the try; if so, inserting after that is a good choice. BasicBlock* finallyBlock = exit->GetTarget(); assert(finallyBlock->hasHndIndex()); BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); @@ -3215,6 +3222,7 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) } } else +#endif { newExit = fgNewBBbefore(BBJ_ALWAYS, exit, false, exit); newExit->SetFlags(BBF_NONE_QUIRK); From 3304ae35496b87e41afdadbc64585f2b1a911e18 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 9 Feb 2024 12:38:23 +0100 Subject: [PATCH 26/64] Set proper weight of exit --- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/optimizer.cpp | 143 ++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 257afdde43ba92..7a1e8e29dff1c2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6802,6 +6802,8 @@ class Compiler bool optCanonicalizeExits(FlowGraphNaturalLoop* loop); bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit); + weight_t optEstimateEdgeLikelihood(BasicBlock* from, BasicBlock* to, bool* fromProfile); + void optSetExitWeight(FlowGraphNaturalLoop* loop, BasicBlock* exit); PhaseStatus optCloneLoops(); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 00877dadbb3d74..180d8ecb1f0034 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3246,9 +3246,152 @@ bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit) } } + optSetExitWeight(loop, newExit); return true; } +//----------------------------------------------------------------------------- +// optEstimateEdgeLikelihood: Given a block "from" that may transfer control to +// "to", estimate the likelihood that this will happen taking profile into +// account if available. +// +// Parameters: +// from - From block +// to - To block +// fromProfile - [out] Whether or not the estimate is based on profile data +// +// Returns: +// Estimated likelihood of the edge being taken. +// +weight_t Compiler::optEstimateEdgeLikelihood(BasicBlock* from, BasicBlock* to, bool* fromProfile) +{ + *fromProfile = (from->HasFlag(BBF_PROF_WEIGHT) != BBF_EMPTY) && (to->HasFlag(BBF_PROF_WEIGHT) != BBF_EMPTY); + if (!fgIsUsingProfileWeights() || !from->HasFlag(BBF_PROF_WEIGHT) || !to->HasFlag(BBF_PROF_WEIGHT) || + from->KindIs(BBJ_ALWAYS)) + { + return 1.0 / from->NumSucc(this); + } + + bool useEdgeWeights = fgHaveValidEdgeWeights; + + weight_t takenCount = 0; + weight_t notTakenCount = 0; + + if (useEdgeWeights) + { + from->VisitRegularSuccs(this, [&, to](BasicBlock* succ) { + *fromProfile &= succ->hasProfileWeight(); + FlowEdge* edge = fgGetPredForBlock(succ, from); + weight_t edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2.0; + + if (succ == to) + { + takenCount += edgeWeight; + } + else + { + notTakenCount += edgeWeight; + } + return BasicBlockVisit::Continue; + }); + + // Watch out for cases where edge weights were not properly maintained + // so that it appears no profile flow goes to 'to'. + // + useEdgeWeights = !fgProfileWeightsConsistent(takenCount, BB_ZERO_WEIGHT); + } + + if (!useEdgeWeights) + { + takenCount = 0; + notTakenCount = 0; + + from->VisitRegularSuccs(this, [&, to](BasicBlock* succ) { + *fromProfile &= succ->hasProfileWeight(); + if (succ == to) + { + takenCount += succ->bbWeight; + } + else + { + notTakenCount += succ->bbWeight; + } + + return BasicBlockVisit::Continue; + }); + } + + if (!*fromProfile) + { + return 1.0 / from->NumSucc(this); + } + + if (fgProfileWeightsConsistent(takenCount, BB_ZERO_WEIGHT)) + { + return 0; + } + + weight_t likelihood = takenCount / (takenCount + notTakenCount); + return likelihood; +} + +//----------------------------------------------------------------------------- +// optSetExitWeight: Set the weight of a newly created exit, after it +// has been added to the flowgraph. +// +// Parameters: +// loop - The loop +// preheader - The new exit block +// +void Compiler::optSetExitWeight(FlowGraphNaturalLoop* loop, BasicBlock* exit) +{ + bool hasProfWeight = true; + + // Inherit first estimate from the exit target; optEstimateEdgeLikelihood + // may use it in its estimate if we do not have edge weights to estimate + // from (we also assume the exiting -> exit edges already inherited their + // edge weights from the previous edge). + exit->inheritWeight(exit->GetTarget()); + + weight_t exitWeight = BB_ZERO_WEIGHT; + for (FlowEdge* exitEdge : exit->PredEdges()) + { + BasicBlock* exiting = exitEdge->getSourceBlock(); + + bool fromProfile = false; + weight_t likelihood = optEstimateEdgeLikelihood(exiting, exit, &fromProfile); + hasProfWeight &= fromProfile; + + weight_t contribution = exiting->bbWeight * likelihood; + JITDUMP(" Estimated likelihood " FMT_BB " -> " FMT_BB " to be " FMT_WT " (contribution: " FMT_WT ")\n", + exiting->bbNum, exit->bbNum, likelihood, contribution); + + exitWeight += contribution; + + // Normalize exiting -> new exit weight + exitEdge->setEdgeWeights(contribution, contribution, exit); + } + + exit->RemoveFlags(BBF_PROF_WEIGHT | BBF_RUN_RARELY); + + exit->bbWeight = exitWeight; + if (hasProfWeight) + { + exit->SetFlags(BBF_PROF_WEIGHT); + } + + if (exitWeight == BB_ZERO_WEIGHT) + { + exit->SetFlags(BBF_RUN_RARELY); + return; + } + + // Normalize new exit -> old exit weight + FlowEdge* const edgeFromNewExit = fgGetPredForBlock(exit->GetTarget(), exit); + assert(edgeFromNewExit != nullptr); + edgeFromNewExit->setEdgeWeights(exit->bbWeight, exit->bbWeight, exit->GetTarget()); +} + /***************************************************************************** * * See if the given tree can be computed in the given precision (which must From 056a56a0d83396b56dbafcf0727e7ea6416dac34 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 9 Feb 2024 13:09:03 +0100 Subject: [PATCH 27/64] Add stats --- src/coreclr/jit/inductionvariableopts.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 76424ae6a9c1f8..a55b8f19f5187b 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -804,6 +804,10 @@ Scev* ScalarEvolutionContext::Simplify(Scev* scev) } } +unsigned buckets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0 }; +static Histogram s_widening(buckets); +static DumpOnShutdown sd("Num widenings", &s_widening); + //------------------------------------------------------------------------ // optCanSinkWidenedIV: Check to see if we are able to sink a store to the old // local into the exits of a loop if we decide to widen. @@ -1159,6 +1163,7 @@ PhaseStatus Compiler::optInductionVariables() } #ifdef TARGET_64BIT + unsigned numWidenings = 0; ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) @@ -1264,8 +1269,11 @@ PhaseStatus Compiler::optInductionVariables() }); changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); + numWidenings++; } } + + s_widening.record(numWidenings); #endif fgInvalidateDfsTree(); From bdab205d1f7be971244ae3cce20068ed12527a49 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 12 Feb 2024 12:49:54 +0100 Subject: [PATCH 28/64] Remove stats --- src/coreclr/jit/inductionvariableopts.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index a55b8f19f5187b..76424ae6a9c1f8 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -804,10 +804,6 @@ Scev* ScalarEvolutionContext::Simplify(Scev* scev) } } -unsigned buckets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0 }; -static Histogram s_widening(buckets); -static DumpOnShutdown sd("Num widenings", &s_widening); - //------------------------------------------------------------------------ // optCanSinkWidenedIV: Check to see if we are able to sink a store to the old // local into the exits of a loop if we decide to widen. @@ -1163,7 +1159,6 @@ PhaseStatus Compiler::optInductionVariables() } #ifdef TARGET_64BIT - unsigned numWidenings = 0; ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) @@ -1269,11 +1264,8 @@ PhaseStatus Compiler::optInductionVariables() }); changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); - numWidenings++; } } - - s_widening.record(numWidenings); #endif fgInvalidateDfsTree(); From f10665940ef27df326b2f648ed074bcbab3e6167 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 12 Feb 2024 16:40:13 +0100 Subject: [PATCH 29/64] Only do IV widening on x64 --- src/coreclr/jit/inductionvariableopts.cpp | 37 ++++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 76424ae6a9c1f8..23f489e780d872 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -899,6 +899,7 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) { GenTree* node = *use; + if (!node->OperIs(GT_CAST)) { return WALK_CONTINUE; @@ -936,15 +937,20 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl int savedSize = 0; loop->VisitLoopBlocks([&](BasicBlock* block) { - visitor.NumExtensions = 0; - for (Statement* stmt : block->NonPhiStatements()) { visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + + if (visitor.NumExtensions > 0) + { + JITDUMP(" Found %u zero extensions in " FMT_STMT "\n", visitor.NumExtensions, stmt->GetID()); + + savedSize += (int)visitor.NumExtensions * ExtensionSize; + savedCost += visitor.NumExtensions * block->getBBWeight(this) * ExtensionCost; + visitor.NumExtensions = 0; + } } - savedSize += (int)visitor.NumExtensions * ExtensionSize; - savedCost += visitor.NumExtensions * block->getBBWeight(this) * ExtensionCost; return BasicBlockVisit::Continue; }); @@ -1090,11 +1096,26 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals. break; case GT_STORE_LCL_VAR: + { node->AsLclVarCommon()->SetLclNum(m_newLclNum); - node->AsLclVarCommon()->gtType = TYP_LONG; - node->AsLclVarCommon()->Data() = - m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); + node->gtType = TYP_LONG; + GenTree* data = node->AsLclVarCommon()->Data(); + if (data->OperIs(GT_ADD) && (data->gtGetOp1()->OperIs(GT_LCL_VAR) && (data->gtGetOp1()->AsLclVarCommon()->GetLclNum() == m_lclNum)) && + data->gtGetOp2()->OperIs(GT_CNS_INT)) + { + data->gtType = TYP_LONG; + data->gtGetOp1()->AsLclVarCommon()->SetLclNum(m_newLclNum); + data->gtGetOp1()->gtType = TYP_LONG; + data->gtGetOp2()->gtType = TYP_LONG; + return fgWalkResult::WALK_SKIP_SUBTREES; + } + else + { + node->AsLclVarCommon()->Data() = + m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); + } break; + } case GT_LCL_FLD: case GT_STORE_LCL_FLD: assert(!"Unexpected field use for local not marked as DNER"); @@ -1148,6 +1169,7 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; +#if defined(TARGET_64BIT) && defined(TARGET_XARCH) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); if (optCanonicalizeLoops()) @@ -1158,7 +1180,6 @@ PhaseStatus Compiler::optInductionVariables() changed = true; } -#ifdef TARGET_64BIT ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) From 35813bf749f86ae175027249ddfa5d0d53f44435 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 12 Feb 2024 17:47:22 +0100 Subject: [PATCH 30/64] Clean up --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/inductionvariableopts.cpp | 70 ++++++++++------------- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fc8b6e9bdc9edb..718dc335b1be9b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4956,7 +4956,7 @@ class Compiler unsigned fgBBcountAtCodegen; // # of BBs in the method at the start of codegen jitstd::vector* fgBBOrder; // ordered vector of BBs #endif - // Used as a quick check for whether loop alignment should look for natural loops. + // Used as a quick check for whether phases downstream of loop finding should look for natural loops. // If true: there may or may not be any natural loops in the flow graph, so try to find them // If false: there's definitely not any natural loops in the flow graph bool fgMightHaveNaturalLoops; diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 23f489e780d872..f7a87195b373f9 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -730,8 +730,8 @@ Scev* ScalarEvolutionContext::Simplify(Scev* scev) if (op1->OperIs(ScevOper::AddRec)) { - // TODO: This requires some proof that it is ok, but currently - // we do not rely on this. + // TODO-Cleanup: This requires some proof that it is ok, but + // currently we do not rely on this. return op1; } @@ -821,11 +821,14 @@ Scev* ScalarEvolutionContext::Simplify(Scev* scev) // into the old one in the exits where the IV variable is live. // // We are able to sink when none of the exits are critical blocks, in the -// sense that all their predecessors must come from inside the loop. +// sense that all their predecessors must come from inside the loop. Loop +// exit canonicalization guarantees this for regular exit blocks, but +// exceptional exits may still have non-loop predecessors. // -// TODO-CQ: If we canonicalize loop exits we can guarantee this property for -// regular exits; that will allow us to always sink except when the loop is -// enclosed in a try region whose handler also uses the IV variable. +// Note that there may be natural loops that have not had their regular exits +// canonicalized at the time when IV opts run, in particular if RBO/assertion +// prop makes a previously unnatural loop natural. This function accounts for +// and rejects these cases. // bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { @@ -1086,7 +1089,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } } } - else if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD) && + else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) && (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) { switch (node->OperGet()) @@ -1099,21 +1102,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen { node->AsLclVarCommon()->SetLclNum(m_newLclNum); node->gtType = TYP_LONG; - GenTree* data = node->AsLclVarCommon()->Data(); - if (data->OperIs(GT_ADD) && (data->gtGetOp1()->OperIs(GT_LCL_VAR) && (data->gtGetOp1()->AsLclVarCommon()->GetLclNum() == m_lclNum)) && - data->gtGetOp2()->OperIs(GT_CNS_INT)) - { - data->gtType = TYP_LONG; - data->gtGetOp1()->AsLclVarCommon()->SetLclNum(m_newLclNum); - data->gtGetOp1()->gtType = TYP_LONG; - data->gtGetOp2()->gtType = TYP_LONG; - return fgWalkResult::WALK_SKIP_SUBTREES; - } - else - { - node->AsLclVarCommon()->Data() = - m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); - } + node->AsLclVarCommon()->Data() = + m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG); break; } case GT_LCL_FLD: @@ -1167,18 +1157,21 @@ PhaseStatus Compiler::optInductionVariables() } #endif + if (!fgMightHaveNaturalLoops) + { + JITDUMP(" Skipping since this method has no natural loops\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + bool changed = false; -#if defined(TARGET_64BIT) && defined(TARGET_XARCH) + // Currently we only do IV widening which is only profitable for x64 + // because arm64 addressing modes can include the zero/sign-extension of + // the index for free. + CLANG_FORMAT_COMMENT_ANCHOR; +#if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - if (optCanonicalizeLoops()) - { - fgInvalidateDfsTree(); - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - changed = true; - } ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); @@ -1200,13 +1193,14 @@ PhaseStatus Compiler::optInductionVariables() DISPSTMT(stmt); GenTreeLclVarCommon* lcl = stmt->GetRootNode()->AsLclVarCommon(); - if (genActualType(lcl) != TYP_INT) + LclVarDsc* lclDsc = lvaGetDesc(lcl); + if (lclDsc->TypeGet() != TYP_INT) { - JITDUMP(" Type is %s, no widening to be done\n", varTypeName(genActualType(lcl))); + JITDUMP(" Type is %s, no widening to be done\n", varTypeName(lclDsc->TypeGet())); continue; } - if (lvaGetDesc(lcl)->lvDoNotEnregister) + if (lclDsc->lvDoNotEnregister) { JITDUMP(" V%02u is marked DNER\n", lcl->GetLclNum()); continue; @@ -1251,15 +1245,11 @@ PhaseStatus Compiler::optInductionVariables() if (addRec->Start->OperIs(ScevOper::Constant)) { ScevConstant* cns = (ScevConstant*)addRec->Start; - initVal = gtNewIconNode((int64_t)(uint32_t)(((ScevConstant*)addRec->Start)->Value), TYP_LONG); + initVal = gtNewIconNode((int64_t)(uint32_t)cns->Value, TYP_LONG); } else { - LclVarDsc* lclDsc = lvaGetDesc(lcl); - initVal = - gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), - lclDsc->lvNormalizeOnLoad() ? lclDsc->TypeGet() : TYP_INT), - true, TYP_LONG); + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); } JITDUMP("Adding initialization of new widened local to preheader:\n"); @@ -1287,9 +1277,9 @@ PhaseStatus Compiler::optInductionVariables() changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); } } -#endif fgInvalidateDfsTree(); +#endif return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From 0453ee6e1230a66db75e82989f3b0528130c10ed Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 12 Feb 2024 17:47:54 +0100 Subject: [PATCH 31/64] Run jit-format --- src/coreclr/jit/inductionvariableopts.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index f7a87195b373f9..58aa08901275a3 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1192,8 +1192,8 @@ PhaseStatus Compiler::optInductionVariables() DISPSTMT(stmt); - GenTreeLclVarCommon* lcl = stmt->GetRootNode()->AsLclVarCommon(); - LclVarDsc* lclDsc = lvaGetDesc(lcl); + GenTreeLclVarCommon* lcl = stmt->GetRootNode()->AsLclVarCommon(); + LclVarDsc* lclDsc = lvaGetDesc(lcl); if (lclDsc->TypeGet() != TYP_INT) { JITDUMP(" Type is %s, no widening to be done\n", varTypeName(lclDsc->TypeGet())); From 1fc2f42fd6d8eefe75fee812bff0199299050e66 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Feb 2024 10:04:50 +0100 Subject: [PATCH 32/64] Only look at regular exits --- src/coreclr/jit/compiler.h | 3 -- src/coreclr/jit/compiler.hpp | 56 ++--------------------- src/coreclr/jit/inductionvariableopts.cpp | 44 ++++++++++++++---- 3 files changed, 38 insertions(+), 65 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 89f0bd7ab61e7a..25407bf4825011 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2184,9 +2184,6 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocksLexical(TFunc func); - template - BasicBlockVisit VisitAllExitBlocks(TFunc func); - template BasicBlockVisit VisitRegularExitBlocks(TFunc func); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index f803a05af679ac..6a8a5f0814355a 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4970,45 +4970,6 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) return BasicBlockVisit::Continue; } -//------------------------------------------------------------------------------ -// FlowGraphNaturalLoop::VisitAllExitBlocks: Visit all blocks that are outside -// the loop but that may have predecessors inside the loop. This includes -// handler blocks. -// -// Type parameters: -// TFunc - Callback functor type -// -// Arguments: -// func - Callback functor that takes a BasicBlock* and returns a -// BasicBlockVisit. -// -// Returns: -// BasicBlockVisit that indicated whether the visit was aborted by the -// callback or whether all blocks were visited. -// -template -BasicBlockVisit FlowGraphNaturalLoop::VisitAllExitBlocks(TFunc func) -{ - Compiler* comp = m_dfsTree->GetCompiler(); - - BitVecTraits traits = m_dfsTree->PostOrderTraits(); - BitVec visited(BitVecOps::MakeEmpty(&traits)); - - BasicBlockVisit result = VisitLoopBlocksReversePostOrder([&, comp](BasicBlock* block) { - return block->VisitAllSuccs(comp, [&](BasicBlock* succ) { - if (!ContainsBlock(succ) && BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && - (func(succ) == BasicBlockVisit::Abort)) - { - return BasicBlockVisit::Abort; - } - - return BasicBlockVisit::Continue; - }); - }); - - return result; -} - //------------------------------------------------------------------------------ // FlowGraphNaturalLoop::VisitRegularExitBlocks: Visit non-handler blocks that // are outside the loop but that may have regular predecessors inside the loop. @@ -5042,19 +5003,10 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) for (FlowEdge* edge : ExitEdges()) { - BasicBlockVisit result = edge->getSourceBlock()->VisitRegularSuccs(comp, [&](BasicBlock* succ) { - assert(m_dfsTree->Contains(succ)); - if (!comp->bbIsHandlerBeg(succ) && !ContainsBlock(succ) && - BitVecOps::TryAddElemD(&traits, visited, succ->bbPostorderNum) && - (func(succ) == BasicBlockVisit::Abort)) - { - return BasicBlockVisit::Abort; - } - - return BasicBlockVisit::Continue; - }); - - if (result == BasicBlockVisit::Abort) + BasicBlock* exit = edge->getDestinationBlock(); + assert(m_dfsTree->Contains(exit) && !ContainsBlock(exit)); + if (!comp->bbIsHandlerBeg(exit) && BitVecOps::TryAddElemD(&traits, visited, exit->bbPostorderNum) && + (func(exit) == BasicBlockVisit::Abort)) { return BasicBlockVisit::Abort; } diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 58aa08901275a3..3dc5ad229a7ce1 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -822,8 +822,10 @@ Scev* ScalarEvolutionContext::Simplify(Scev* scev) // // We are able to sink when none of the exits are critical blocks, in the // sense that all their predecessors must come from inside the loop. Loop -// exit canonicalization guarantees this for regular exit blocks, but -// exceptional exits may still have non-loop predecessors. +// exit canonicalization guarantees this for regular exit blocks. It is not +// guaranteed for exceptional exits, but we do not expect to widen IVs that +// are live into exceptional exits since those are marked DNER which makes it +// unprofitable anyway. // // Note that there may be natural loops that have not had their regular exits // canonicalized at the time when IV opts run, in particular if RBO/assertion @@ -834,29 +836,51 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); - BasicBlockVisit result = loop->VisitAllExitBlocks([=](BasicBlock* exit) { + BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { + if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; } - for (FlowEdge* predEdge = BlockPredsWithEH(exit); predEdge != nullptr; predEdge = predEdge->getNextPredEdge()) + for (BasicBlock* pred : exit->PredBlocks()) { - if (!loop->ContainsBlock(predEdge->getSourceBlock())) + if (!loop->ContainsBlock(pred)) { JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP "; it has a non-loop pred " FMT_BB "\n", - lclNum, exit->bbNum, loop->GetIndex(), predEdge->getSourceBlock()->bbNum); + lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum); return BasicBlockVisit::Abort; } } - JITDUMP(" V%02u is live into exit " FMT_BB "; will sink the widened value\n", lclNum, exit->bbNum); return BasicBlockVisit::Continue; }); - return result == BasicBlockVisit::Continue; +#ifdef DEBUG + // We currently do not expect to ever widen IVs that are live into + // exceptional exits. Such IVs are expected to have been marked DNER + // previously (EH write-thru is only for single def locals) which makes it + // unprofitable. If this ever changes we need some more expansive handling + // here. + loop->VisitLoopBlocks([=](BasicBlock* block) { + + block->VisitAllSuccs(this, [=](BasicBlock* succ) { + if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ)) + { + assert(!VarSetOps::IsMember(this, succ->bbLiveIn, dsc->lvVarIndex) && + "Candidate IV for widening is live into exceptional exit"); + } + + return BasicBlockVisit::Continue; + }); + + return BasicBlockVisit::Continue; + }); +#endif + + return result != BasicBlockVisit::Abort; } //------------------------------------------------------------------------ @@ -971,7 +995,7 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl // Now account for the cost of sinks. LclVarDsc* dsc = lvaGetDesc(lclNum); - loop->VisitAllExitBlocks([&](BasicBlock* exit) { + loop->VisitRegularExitBlocks([&](BasicBlock* exit) { if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { savedSize -= ExtensionSize; @@ -1021,7 +1045,7 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa { bool anySunk = false; LclVarDsc* dsc = lvaGetDesc(lclNum); - loop->VisitAllExitBlocks([=, &anySunk](BasicBlock* exit) { + loop->VisitRegularExitBlocks([=, &anySunk](BasicBlock* exit) { if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { return BasicBlockVisit::Continue; From b2e201c2fd06cfe1f158679cc838ffae4fe2cfb4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Feb 2024 11:32:51 +0100 Subject: [PATCH 33/64] Add depth limit; small clean ups --- src/coreclr/jit/inductionvariableopts.cpp | 72 ++++++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 3dc5ad229a7ce1..857308d7c222eb 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -122,7 +122,8 @@ class ScalarEvolutionContext FlowGraphNaturalLoop* m_loop = nullptr; ScalarEvolutionMap m_cache; - Scev* AnalyzeNew(BasicBlock* block, GenTree* tree); + Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); + Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, Scev* start, BasicBlock* stepDefBlock, @@ -375,7 +376,7 @@ Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) // SCEV node if the tree was analyzable; otherwise nullptr if the value is // cannot be described. // -Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) +Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int depth) { switch (tree->OperGet()) { @@ -402,9 +403,8 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) GenTreeLclVarCommon* def = ssaDsc->GetDefNode(); if ((def != nullptr) && def->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) { - // For constant definitions from outside the loop we prefer to inline the constant. - // TODO: Maybe we shouldn't but should just do it when we dump the scev? - + // For constant definitions from outside the loop we prefer + // to inline the constant. return CreateScevForConstant(def->Data()->AsIntConCommon()); } @@ -424,7 +424,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } - return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode()); + return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode(), depth + 1); } case GT_STORE_LCL_VAR: { @@ -432,7 +432,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) GenTree* data = store->Data(); if (!data->OperIs(GT_PHI)) { - return Analyze(block, data); + return Analyze(block, data, depth + 1); } if (block != m_loop->GetHeader()) @@ -526,7 +526,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) return nullptr; } - Scev* op = Analyze(block, cast->CastOp()); + Scev* op = Analyze(block, cast->CastOp(), depth + 1); if (op == nullptr) { return nullptr; @@ -538,11 +538,11 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) case GT_MUL: case GT_LSH: { - Scev* op1 = Analyze(block, tree->gtGetOp1()); + Scev* op1 = Analyze(block, tree->gtGetOp1(), depth + 1); if (op1 == nullptr) return nullptr; - Scev* op2 = Analyze(block, tree->gtGetOp2()); + Scev* op2 = Analyze(block, tree->gtGetOp2(), depth + 1); if (op2 == nullptr) return nullptr; @@ -566,11 +566,11 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree) } case GT_COMMA: { - return Analyze(block, tree->gtGetOp2()); + return Analyze(block, tree->gtGetOp2(), depth + 1); } case GT_ARR_ADDR: { - return Analyze(block, tree->AsArrAddr()->Addr()); + return Analyze(block, tree->AsArrAddr()->Addr(), depth + 1); } default: return nullptr; @@ -640,11 +640,48 @@ Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStor // cannot be described. // Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) +{ + return Analyze(block, tree, 0); +} + +// Since the analysis follows SSA defs we have no upper bound on the potential +// depth of the analysis performed. We put an artificial limit on this for two +// reasons: +// 1. The analysis is recursive, and we should not stack overflow regardless of +// the input program. +// 2. If we produced arbitrarily deep SCEV trees then all algorithms over their +// structure would similarly be at risk of stack overflows if they were +// recursive. However, these algorithms are generally much more elegant when +// they make use of recursion. +const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64; + +static Counter s_numDepthHit; +DumpOnShutdown ds("ScalarEvolutionContext::Analyze depth hit", &s_numDepthHit); + +//------------------------------------------------------------------------ +// Analyze: Analyze the specified tree in the specified block. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// depth - Current analysis depth. +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// +Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int depth) { Scev* result; if (!m_cache.Lookup(tree, &result)) { - result = AnalyzeNew(block, tree); + if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH) + { + s_numDepthHit.Value++; + return nullptr; + } + + result = AnalyzeNew(block, tree, depth); m_cache.Set(tree, result); } @@ -1189,9 +1226,9 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; - // Currently we only do IV widening which is only profitable for x64 - // because arm64 addressing modes can include the zero/sign-extension of - // the index for free. + // Currently we only do IV widening which generally is only profitable for + // x64 because arm64 addressing modes can include the zero/sign-extension + // of the index for free. CLANG_FORMAT_COMMENT_ANCHOR; #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); @@ -1224,6 +1261,9 @@ PhaseStatus Compiler::optInductionVariables() continue; } + // If the IV is not enregisterable then uses/defs are going to go + // to stack regardless. This check also filters out IVs that may be + // live into exceptional exits since those are always marked DNER. if (lclDsc->lvDoNotEnregister) { JITDUMP(" V%02u is marked DNER\n", lcl->GetLclNum()); From c63baf037e680f9b459bb6e26995f1df8d8e3677 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Feb 2024 14:55:27 +0100 Subject: [PATCH 34/64] Revert unnecessary change --- src/coreclr/jit/redundantbranchopts.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 0f155eaca8916d..3ea6142de4cd3f 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -11,6 +11,7 @@ // PhaseStatus Compiler::optRedundantBranches() { + #if DEBUG if (verbose) { From d946974be2da217c2566070e5b135e022aa13acd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Feb 2024 16:17:17 +0100 Subject: [PATCH 35/64] Remove counter --- src/coreclr/jit/inductionvariableopts.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 857308d7c222eb..0e295ad7dc739e 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -655,9 +655,6 @@ Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) // they make use of recursion. const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64; -static Counter s_numDepthHit; -DumpOnShutdown ds("ScalarEvolutionContext::Analyze depth hit", &s_numDepthHit); - //------------------------------------------------------------------------ // Analyze: Analyze the specified tree in the specified block. // @@ -677,7 +674,6 @@ Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int dept { if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH) { - s_numDepthHit.Value++; return nullptr; } From 64bad73c32ae921ea6a5134ad630e8d4a036994c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Feb 2024 18:40:13 +0100 Subject: [PATCH 36/64] Add more info for LclVarDsc reason --- src/coreclr/jit/inductionvariableopts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 0e295ad7dc739e..d0b680321aae00 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1298,7 +1298,7 @@ PhaseStatus Compiler::optInductionVariables() } changed = true; - unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Widened primary induction variable")); + unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); GenTree* initVal; From de391a60a280a144fe64adb273204a97263c7baf Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 15 Feb 2024 14:54:05 +0100 Subject: [PATCH 37/64] Share widened IVs with same reaching def from outside the loop --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/inductionvariableopts.cpp | 188 +++++++++++++++++----- src/coreclr/scripts/superpmi.py | 1 + 3 files changed, 148 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 023776268a4705..e0da9bd166c5bc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,7 +7407,7 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, struct ScevAddRec* addRec, FlowGraphNaturalLoop* loop); + bool optIsIVWideningProfitable(unsigned lclNum, bool needsInitialization, FlowGraphNaturalLoop* loop); void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index d0b680321aae00..1170594791fc65 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -58,6 +58,8 @@ struct Scev { return Type == type; } + + bool GetConstantValue(Compiler* comp, int64_t* cns); }; struct ScevConstant : Scev @@ -78,6 +80,31 @@ struct ScevLocal : Scev const unsigned LclNum; const unsigned SsaNum; + + //------------------------------------------------------------------------ + // GetConstantValue: If this SSA use refers to a constant, then fetch that + // constant. + // + // Parameters: + // comp - Compiler instance + // cns - [out] Constant value; only valid if this function returns true. + // + // Returns: + // True if this SSA use refers to a constant; otherwise false, + // + bool GetConstantValue(Compiler* comp, int64_t* cns) + { + LclVarDsc* dsc = comp->lvaGetDesc(LclNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum); + GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode(); + if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + *cns = defNode->Data()->AsIntConCommon()->IntegralValue(); + return true; + } + + return false; + } }; struct ScevUnop : Scev @@ -111,6 +138,34 @@ struct ScevAddRec : Scev Scev* const Step; }; +//------------------------------------------------------------------------ +// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an +// inline constant or an SSA use referring to a constant) then obtain that +// constant. +// +// Parameters: +// comp - Compiler instance +// cns - [out] Constant value; only valid if this function returns true. +// +// Returns: +// True if a constant could be extracted. +// +bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) +{ + if (OperIs(ScevOper::Constant)) + { + *cns = ((ScevConstant*)this)->Value; + return true; + } + + if (OperIs(ScevOper::Local)) + { + return ((ScevLocal*)this)->GetConstantValue(comp, cns); + } + + return false; +} + typedef JitHashTable, Scev*> ScalarEvolutionMap; // Scalar evolution is analyzed in the context of a single loop, and are @@ -125,7 +180,7 @@ class ScalarEvolutionContext Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - Scev* start, + ScevLocal* start, BasicBlock* stepDefBlock, GenTree* stepDefData); Scev* CreateSimpleInvariantScev(GenTree* tree); @@ -259,6 +314,12 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) { ScevLocal* invariantLocal = (ScevLocal*)scev; printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); + + int64_t cns; + if (invariantLocal->GetConstantValue(m_comp, &cns)) + { + printf(" (%lld)", (long long)cns); + } break; } case ScevOper::ZeroExtend: @@ -399,15 +460,6 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) { - // Invariant local - GenTreeLclVarCommon* def = ssaDsc->GetDefNode(); - if ((def != nullptr) && def->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) - { - // For constant definitions from outside the loop we prefer - // to inline the constant. - return CreateScevForConstant(def->Data()->AsIntConCommon()); - } - return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); } @@ -465,12 +517,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d return nullptr; } - Scev* enterScev = Analyze(block, enterSsa); - - if (enterScev == nullptr) - { - return nullptr; - } + ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum()); LclVarDsc* dsc = m_comp->lvaGetDesc(store); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); @@ -592,7 +639,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d // SCEV node if this is a simple addrec shape. Otherwise nullptr. // Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - Scev* enterScev, + ScevLocal* enterScev, BasicBlock* stepDefBlock, GenTree* stepDefData) { @@ -920,9 +967,9 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // optIsIVWideningProfitable: Check to see if IV widening is profitable. // // Parameters: -// lclNum - The primary induction variable -// addRec - Value of the induction variable -// loop - The loop +// lclNum - The primary induction variable +// needsInitialization - Whether or not the widened IV will need explicit initialization +// loop - The loop // // Returns: // True if IV widening is profitable. @@ -937,7 +984,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool needsInitialization, FlowGraphNaturalLoop* loop) { struct CountZeroExtensionsVisitor : GenTreeVisitor { @@ -1014,9 +1061,9 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl return BasicBlockVisit::Continue; }); - if (!addRec->Start->OperIs(ScevOper::Constant)) + if (needsInitialization) { - // Need to insert a move from the narrow local in the preheader. + // Need to insert a move from the narrow local. savedSize -= ExtensionSize; savedCost -= loop->EntryEdge(0)->getSourceBlock()->getBBWeight(this) * ExtensionCost; } @@ -1231,6 +1278,19 @@ PhaseStatus Compiler::optInductionVariables() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); ScalarEvolutionContext scevContext(this); + struct WidenedIV + { + unsigned LclNum; + unsigned InitSsaNum; + unsigned NewLclNum; + + WidenedIV(unsigned lclNum, unsigned initSsaNum, unsigned newLclNum) + : LclNum(lclNum), InitSsaNum(initSsaNum), NewLclNum(newLclNum) + { + } + }; + ArrayStack widenedIVs(getAllocator(CMK_LoopScalarEvolution)); + JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { @@ -1292,33 +1352,77 @@ PhaseStatus Compiler::optInductionVariables() continue; } - if (!optIsIVWideningProfitable(lcl->GetLclNum(), addRec, loop)) + // Start value should always be an SSA use from outside the loop + // since we only widen primary IVs. + assert(addRec->Start->OperIs(ScevOper::Local)); + ScevLocal* startLocal = (ScevLocal*)addRec->Start; + unsigned newLclNum = BAD_VAR_NUM; + for (int i = 0; i < widenedIVs.Height(); i++) { - continue; + WidenedIV& wiv = widenedIVs.BottomRef(i); + if ((wiv.LclNum == startLocal->LclNum) && (wiv.InitSsaNum == startLocal->SsaNum)) + { + newLclNum = wiv.NewLclNum; + break; + } } - changed = true; - unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); - JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); - - GenTree* initVal; - if (addRec->Start->OperIs(ScevOper::Constant)) + int64_t startConstant = 0; + bool initToConstant = startLocal->GetConstantValue(this, &startConstant); + // Even with constants we need explicit initialization, but we + // assume that the old local will be DCE'd in which case it + // balances out. + bool needsExplicitInitialization = (newLclNum == BAD_VAR_NUM) && !initToConstant; + if (!optIsIVWideningProfitable(lcl->GetLclNum(), needsExplicitInitialization, loop)) { - ScevConstant* cns = (ScevConstant*)addRec->Start; - initVal = gtNewIconNode((int64_t)(uint32_t)cns->Value, TYP_LONG); + continue; } - else + + changed = true; + + if (newLclNum == BAD_VAR_NUM) { - initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); + newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + assert(startLocal->LclNum == lcl->GetLclNum()); + lclDsc = lvaGetDesc(lcl); + LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); + + BasicBlock* initBlock; + if (startSsaDsc->GetBlock() != nullptr) + { + initBlock = startSsaDsc->GetBlock(); + JITDUMP( + "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB + "\n", + initBlock->bbNum); + + // Any other use of this IV can reuse the same wide local/initialization. + widenedIVs.Emplace(startLocal->LclNum, startLocal->SsaNum, newLclNum); + } + else + { + initBlock = loop->EntryEdge(0)->getSourceBlock(); + JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); + } + + GenTree* initVal; + if (initToConstant) + { + initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); + } + else + { + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); + } + + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + Statement* initStmt = fgNewStmtFromTree(widenStore); + fgInsertStmtNearEnd(initBlock, initStmt); + DISPSTMT(initStmt); + JITDUMP("\n"); } - JITDUMP("Adding initialization of new widened local to preheader:\n"); - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - Statement* initStmt = fgNewStmtFromTree(widenStore); - fgInsertStmtAtEnd(preheader, initStmt); - DISPSTMT(initStmt); - JITDUMP("\n"); + JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum); loop->VisitLoopBlocks([=](BasicBlock* block) { diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index baca765929ce4c..1814b5c95cb15f 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2671,6 +2671,7 @@ def pick_contexts_to_disassemble(self, diffs): if self.coreclr_args.metrics is not None: contexts = diffs examples = [] + contexts = [r for r in diffs if int(r["Diff size"]) > int(r["Base size"])] else: # In the default case we have size improvements/regressions # available without needing to disassemble all, so pick a subset of From 6febef6774a8123f9c2073a5e33a44b5ab1d3229 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 15 Feb 2024 16:47:53 +0100 Subject: [PATCH 38/64] Revert regressions only change --- src/coreclr/scripts/superpmi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 1814b5c95cb15f..baca765929ce4c 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2671,7 +2671,6 @@ def pick_contexts_to_disassemble(self, diffs): if self.coreclr_args.metrics is not None: contexts = diffs examples = [] - contexts = [r for r in diffs if int(r["Diff size"]) > int(r["Base size"])] else: # In the default case we have size improvements/regressions # available without needing to disassemble all, so pick a subset of From 2577046b126ea20e24bb971b20b77b28e24e1f1b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 11:11:14 +0100 Subject: [PATCH 39/64] Also try replacing uses before the loop --- src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/inductionvariableopts.cpp | 181 +++++++++++++++++----- 2 files changed, 144 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e0da9bd166c5bc..07cadbb8eec6ec 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,8 +7407,9 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, bool needsInitialization, FlowGraphNaturalLoop* loop); - void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); + bool optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); + void optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); + void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); // Redundant branch opts diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 1170594791fc65..fa68bfc46081ae 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -924,18 +924,20 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return BasicBlockVisit::Continue; } - for (BasicBlock* pred : exit->PredBlocks()) - { - if (!loop->ContainsBlock(pred)) - { - JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP - "; it has a non-loop pred " FMT_BB "\n", - lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum); - return BasicBlockVisit::Abort; - } - } - - return BasicBlockVisit::Continue; + return BasicBlockVisit::Abort; + + //for (BasicBlock* pred : exit->PredBlocks()) + //{ + // if (!loop->ContainsBlock(pred)) + // { + // JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP + // "; it has a non-loop pred " FMT_BB "\n", + // lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum); + // return BasicBlockVisit::Abort; + // } + //} + + //return BasicBlockVisit::Continue; }); #ifdef DEBUG @@ -984,7 +986,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool needsInitialization, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) { struct CountZeroExtensionsVisitor : GenTreeVisitor { @@ -1061,16 +1063,21 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool needsInitializati return BasicBlockVisit::Continue; }); - if (needsInitialization) + if (reusedIV) { - // Need to insert a move from the narrow local. - savedSize -= ExtensionSize; - savedCost -= loop->EntryEdge(0)->getSourceBlock()->getBBWeight(this) * ExtensionCost; + // If we are reusing the IV then widening is going to remove a use of + // the original narrow local, and we assume this leads to more DCE. + savedSize += ExtensionSize; + savedCost += initBlock->getBBWeight(this) * ExtensionCost; } - else + else if (!initedToConstant) { - // If this is a constant then we make the assumption that we will be - // able to DCE the constant initialization of the narrow local. + // We will need to store the narrow IV into the wide one in the init + // block. We only cost this when init value is not a constant since + // otherwise we assume that constant initialization of the narrow local + // will be DCE'd. + savedSize -= ExtensionSize; + savedCost -= initBlock->getBBWeight(this) * ExtensionCost; } // Now account for the cost of sinks. @@ -1154,14 +1161,19 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa // newLclNum - Wide version of primary induction variable // stmt - The statement to replace uses in. // -void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt) +void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt) { struct ReplaceVisitor : GenTreeVisitor { private: unsigned m_lclNum; + unsigned m_ssaNum; unsigned m_newLclNum; + bool IsLocal(GenTreeLclVarCommon* tree) + { + return (tree->GetLclNum() == m_lclNum) && ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum)); + } public: bool MadeChanges = false; @@ -1170,8 +1182,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen DoPreOrder = true, }; - ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) - : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) + ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned ssaNum, unsigned newLclNum) + : GenTreeVisitor(comp), m_lclNum(lclNum), m_ssaNum(ssaNum), m_newLclNum(newLclNum) { } @@ -1185,7 +1197,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) { GenTree* op = cast->CastOp(); - if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) + if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon())) { *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); MadeChanges = true; @@ -1194,7 +1206,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } } else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) && - (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) + IsLocal(node->AsLclVarCommon())) { switch (node->OperGet()) { @@ -1225,7 +1237,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } }; - ReplaceVisitor visitor(this, lclNum, newLclNum); + ReplaceVisitor visitor(this, lclNum, ssaNum, newLclNum); visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); if (visitor.MadeChanges) { @@ -1241,6 +1253,41 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } } +//------------------------------------------------------------------------ +// optBestEffortReplaceNarrowIVUsesWith: Try to find and replace uses of the specified +// SSA def with a new local. +// +// Parameters: +// lclNum - Previous local +// ssaNum - Previous local SSA num +// newLclNum - New local to replace with +// block - Block to replace in +// firstStmt - First statement in "block" to start replacing in +// +void Compiler::optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) +{ + JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, firstStmt == nullptr ? 0 : firstStmt->GetID()); + + for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt()) + { + JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, + dspTreeID(stmt->GetRootNode())); + DISPSTMT(stmt); + JITDUMP("\n"); + + optReplaceWidenedIV(lclNum, ssaNum, newLclNum, stmt); + } + + block->VisitRegularSuccs(this, [=](BasicBlock* succ) { + if (succ->GetUniquePred(this) == block) + { + optBestEffortReplaceNarrowIVUsesWith(lclNum, ssaNum, newLclNum, succ, succ->firstStmt()); + } + + return BasicBlockVisit::Continue; + }); +} + //------------------------------------------------------------------------ // optInductionVariables: Try and optimize induction variables in the method. // @@ -1276,6 +1323,9 @@ PhaseStatus Compiler::optInductionVariables() #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + //m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); + + fgDumpFlowGraph(PHASE_OPTIMIZE_INDUCTION_VARIABLES, PhasePosition::PostPhase); ScalarEvolutionContext scevContext(this); struct WidenedIV @@ -1363,34 +1413,73 @@ PhaseStatus Compiler::optInductionVariables() if ((wiv.LclNum == startLocal->LclNum) && (wiv.InitSsaNum == startLocal->SsaNum)) { newLclNum = wiv.NewLclNum; + JITDUMP(" Reusing previously widened version with initial value V%02u.%u, new local V%02u\n", + wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); break; } } int64_t startConstant = 0; bool initToConstant = startLocal->GetConstantValue(this, &startConstant); - // Even with constants we need explicit initialization, but we - // assume that the old local will be DCE'd in which case it - // balances out. - bool needsExplicitInitialization = (newLclNum == BAD_VAR_NUM) && !initToConstant; - if (!optIsIVWideningProfitable(lcl->GetLclNum(), needsExplicitInitialization, loop)) + LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); + + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); + BasicBlock* initBlock = preheader; + if (newLclNum != BAD_VAR_NUM) + { + assert(startSsaDsc->GetBlock() != nullptr); + initBlock = startSsaDsc->GetBlock(); + } + else + { + if (startSsaDsc->GetBlock() != nullptr) + { + initBlock = startSsaDsc->GetBlock(); + } + } + + bool reusedIV = newLclNum != BAD_VAR_NUM; + if (!optIsIVWideningProfitable(lcl->GetLclNum(), reusedIV, initBlock, initToConstant, loop)) { continue; } changed = true; + Statement* narrowInitStmt = nullptr; + if ((initBlock != preheader) && (startSsaDsc->GetDefNode() != nullptr)) + { + GenTree* narrowInitRoot = startSsaDsc->GetDefNode(); + while (true) + { + GenTree* parent = narrowInitRoot->gtGetParent(nullptr); + if (parent == nullptr) + break; + + narrowInitRoot = parent; + } + + for (Statement* stmt : initBlock->Statements()) + { + if (stmt->GetRootNode() == narrowInitRoot) + { + narrowInitStmt = stmt; + break; + } + } + + assert(narrowInitStmt != nullptr); + } + + Statement* initStmt = nullptr; if (newLclNum == BAD_VAR_NUM) { newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + INDEBUG(lclDsc = nullptr); assert(startLocal->LclNum == lcl->GetLclNum()); - lclDsc = lvaGetDesc(lcl); - LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); - BasicBlock* initBlock; - if (startSsaDsc->GetBlock() != nullptr) + if (initBlock != preheader) { - initBlock = startSsaDsc->GetBlock(); JITDUMP( "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB "\n", @@ -1401,7 +1490,6 @@ PhaseStatus Compiler::optInductionVariables() } else { - initBlock = loop->EntryEdge(0)->getSourceBlock(); JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); } @@ -1416,14 +1504,27 @@ PhaseStatus Compiler::optInductionVariables() } GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - Statement* initStmt = fgNewStmtFromTree(widenStore); - fgInsertStmtNearEnd(initBlock, initStmt); + initStmt = fgNewStmtFromTree(widenStore); + if (narrowInitStmt != nullptr) + { + fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); + } + else + { + fgInsertStmtNearEnd(initBlock, initStmt); + } + DISPSTMT(initStmt); JITDUMP("\n"); } JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum); + if (initStmt != nullptr) + { + optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, initStmt->GetNextStmt()); + } + loop->VisitLoopBlocks([=](BasicBlock* block) { for (Statement* stmt : block->NonPhiStatements()) @@ -1432,7 +1533,7 @@ PhaseStatus Compiler::optInductionVariables() dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); - optReplaceWidenedIV(lcl->GetLclNum(), newLclNum, stmt); + optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt); } return BasicBlockVisit::Continue; From 5d7e15ffbc2e985f9bb2b3efa6ffc0651fbe0e54 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 11:11:52 +0100 Subject: [PATCH 40/64] Skip cases where the same local is the IV in multiple loops --- src/coreclr/jit/inductionvariableopts.cpp | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index fa68bfc46081ae..5c27b4fe3cfc6b 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1376,6 +1376,31 @@ PhaseStatus Compiler::optInductionVariables() continue; } + bool hasOtherUses = false; + for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) + { + if (otherLoop == loop) + continue; + + for (Statement* stmt : otherLoop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + break; + + if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lcl->GetLclNum()) + { + hasOtherUses = true; + JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); + break; + } + } + } + + if (hasOtherUses) + { + continue; + } + Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) { From 251a75446290c355aba2293c26f3f559f76d35aa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 15:27:56 +0100 Subject: [PATCH 41/64] More experimenting --- src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/inductionvariableopts.cpp | 360 +++++----------------- 2 files changed, 80 insertions(+), 285 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 07cadbb8eec6ec..023776268a4705 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,9 +7407,8 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); - void optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); - void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); + bool optIsIVWideningProfitable(unsigned lclNum, struct ScevAddRec* addRec, FlowGraphNaturalLoop* loop); + void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); // Redundant branch opts diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 5c27b4fe3cfc6b..51a0da0910bc8f 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -58,8 +58,6 @@ struct Scev { return Type == type; } - - bool GetConstantValue(Compiler* comp, int64_t* cns); }; struct ScevConstant : Scev @@ -80,31 +78,6 @@ struct ScevLocal : Scev const unsigned LclNum; const unsigned SsaNum; - - //------------------------------------------------------------------------ - // GetConstantValue: If this SSA use refers to a constant, then fetch that - // constant. - // - // Parameters: - // comp - Compiler instance - // cns - [out] Constant value; only valid if this function returns true. - // - // Returns: - // True if this SSA use refers to a constant; otherwise false, - // - bool GetConstantValue(Compiler* comp, int64_t* cns) - { - LclVarDsc* dsc = comp->lvaGetDesc(LclNum); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum); - GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode(); - if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) - { - *cns = defNode->Data()->AsIntConCommon()->IntegralValue(); - return true; - } - - return false; - } }; struct ScevUnop : Scev @@ -138,34 +111,6 @@ struct ScevAddRec : Scev Scev* const Step; }; -//------------------------------------------------------------------------ -// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an -// inline constant or an SSA use referring to a constant) then obtain that -// constant. -// -// Parameters: -// comp - Compiler instance -// cns - [out] Constant value; only valid if this function returns true. -// -// Returns: -// True if a constant could be extracted. -// -bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) -{ - if (OperIs(ScevOper::Constant)) - { - *cns = ((ScevConstant*)this)->Value; - return true; - } - - if (OperIs(ScevOper::Local)) - { - return ((ScevLocal*)this)->GetConstantValue(comp, cns); - } - - return false; -} - typedef JitHashTable, Scev*> ScalarEvolutionMap; // Scalar evolution is analyzed in the context of a single loop, and are @@ -180,7 +125,7 @@ class ScalarEvolutionContext Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - ScevLocal* start, + Scev* start, BasicBlock* stepDefBlock, GenTree* stepDefData); Scev* CreateSimpleInvariantScev(GenTree* tree); @@ -314,12 +259,6 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) { ScevLocal* invariantLocal = (ScevLocal*)scev; printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); - - int64_t cns; - if (invariantLocal->GetConstantValue(m_comp, &cns)) - { - printf(" (%lld)", (long long)cns); - } break; } case ScevOper::ZeroExtend: @@ -460,6 +399,15 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) { + // Invariant local + GenTreeLclVarCommon* def = ssaDsc->GetDefNode(); + if ((def != nullptr) && def->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + // For constant definitions from outside the loop we prefer + // to inline the constant. + return CreateScevForConstant(def->Data()->AsIntConCommon()); + } + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); } @@ -517,7 +465,12 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d return nullptr; } - ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum()); + Scev* enterScev = Analyze(block, enterSsa); + + if (enterScev == nullptr) + { + return nullptr; + } LclVarDsc* dsc = m_comp->lvaGetDesc(store); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); @@ -639,7 +592,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d // SCEV node if this is a simple addrec shape. Otherwise nullptr. // Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - ScevLocal* enterScev, + Scev* enterScev, BasicBlock* stepDefBlock, GenTree* stepDefData) { @@ -924,8 +877,6 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return BasicBlockVisit::Continue; } - return BasicBlockVisit::Abort; - //for (BasicBlock* pred : exit->PredBlocks()) //{ // if (!loop->ContainsBlock(pred)) @@ -938,6 +889,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) //} //return BasicBlockVisit::Continue; + return BasicBlockVisit::Abort; }); #ifdef DEBUG @@ -969,9 +921,9 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // optIsIVWideningProfitable: Check to see if IV widening is profitable. // // Parameters: -// lclNum - The primary induction variable -// needsInitialization - Whether or not the widened IV will need explicit initialization -// loop - The loop +// lclNum - The primary induction variable +// addRec - Value of the induction variable +// loop - The loop // // Returns: // True if IV widening is profitable. @@ -986,8 +938,33 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, FlowGraphNaturalLoop* loop) { + if (!addRec->Start->OperIs(ScevOper::Constant)) + { + //for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) + //{ + // if (otherLoop == loop) + // continue; + + // for (Statement* stmt : otherLoop->GetHeader()->Statements()) + // { + // if (!stmt->IsPhiDefnStmt()) + // break; + + // if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum) + // { + // JITDUMP(" " FMT_LP " also has V%02u as an IV; skipping widening\n", otherLoop->GetIndex(), lclNum); + // return false; + // } + // } + //} + return false; + } + + //if (loop->GetHeader()->HasFlag(BBF_CLONED_LOOP_HEADER)) + // return false; + struct CountZeroExtensionsVisitor : GenTreeVisitor { private: @@ -1063,21 +1040,16 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBl return BasicBlockVisit::Continue; }); - if (reusedIV) + if (!addRec->Start->OperIs(ScevOper::Constant)) { - // If we are reusing the IV then widening is going to remove a use of - // the original narrow local, and we assume this leads to more DCE. - savedSize += ExtensionSize; - savedCost += initBlock->getBBWeight(this) * ExtensionCost; + // Need to insert a move from the narrow local in the preheader. + savedSize -= ExtensionSize; + savedCost -= loop->EntryEdge(0)->getSourceBlock()->getBBWeight(this) * ExtensionCost; } - else if (!initedToConstant) + else { - // We will need to store the narrow IV into the wide one in the init - // block. We only cost this when init value is not a constant since - // otherwise we assume that constant initialization of the narrow local - // will be DCE'd. - savedSize -= ExtensionSize; - savedCost -= initBlock->getBBWeight(this) * ExtensionCost; + // If this is a constant then we make the assumption that we will be + // able to DCE the constant initialization of the narrow local. } // Now account for the cost of sinks. @@ -1161,19 +1133,14 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa // newLclNum - Wide version of primary induction variable // stmt - The statement to replace uses in. // -void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt) +void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt) { struct ReplaceVisitor : GenTreeVisitor { private: unsigned m_lclNum; - unsigned m_ssaNum; unsigned m_newLclNum; - bool IsLocal(GenTreeLclVarCommon* tree) - { - return (tree->GetLclNum() == m_lclNum) && ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum)); - } public: bool MadeChanges = false; @@ -1182,8 +1149,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne DoPreOrder = true, }; - ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned ssaNum, unsigned newLclNum) - : GenTreeVisitor(comp), m_lclNum(lclNum), m_ssaNum(ssaNum), m_newLclNum(newLclNum) + ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) + : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) { } @@ -1197,7 +1164,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) { GenTree* op = cast->CastOp(); - if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon())) + if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) { *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); MadeChanges = true; @@ -1206,7 +1173,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne } } else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) && - IsLocal(node->AsLclVarCommon())) + (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) { switch (node->OperGet()) { @@ -1237,7 +1204,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne } }; - ReplaceVisitor visitor(this, lclNum, ssaNum, newLclNum); + ReplaceVisitor visitor(this, lclNum, newLclNum); visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); if (visitor.MadeChanges) { @@ -1253,41 +1220,6 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne } } -//------------------------------------------------------------------------ -// optBestEffortReplaceNarrowIVUsesWith: Try to find and replace uses of the specified -// SSA def with a new local. -// -// Parameters: -// lclNum - Previous local -// ssaNum - Previous local SSA num -// newLclNum - New local to replace with -// block - Block to replace in -// firstStmt - First statement in "block" to start replacing in -// -void Compiler::optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) -{ - JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, firstStmt == nullptr ? 0 : firstStmt->GetID()); - - for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt()) - { - JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, - dspTreeID(stmt->GetRootNode())); - DISPSTMT(stmt); - JITDUMP("\n"); - - optReplaceWidenedIV(lclNum, ssaNum, newLclNum, stmt); - } - - block->VisitRegularSuccs(this, [=](BasicBlock* succ) { - if (succ->GetUniquePred(this) == block) - { - optBestEffortReplaceNarrowIVUsesWith(lclNum, ssaNum, newLclNum, succ, succ->firstStmt()); - } - - return BasicBlockVisit::Continue; - }); -} - //------------------------------------------------------------------------ // optInductionVariables: Try and optimize induction variables in the method. // @@ -1323,24 +1255,8 @@ PhaseStatus Compiler::optInductionVariables() #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - //m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); - - fgDumpFlowGraph(PHASE_OPTIMIZE_INDUCTION_VARIABLES, PhasePosition::PostPhase); ScalarEvolutionContext scevContext(this); - struct WidenedIV - { - unsigned LclNum; - unsigned InitSsaNum; - unsigned NewLclNum; - - WidenedIV(unsigned lclNum, unsigned initSsaNum, unsigned newLclNum) - : LclNum(lclNum), InitSsaNum(initSsaNum), NewLclNum(newLclNum) - { - } - }; - ArrayStack widenedIVs(getAllocator(CMK_LoopScalarEvolution)); - JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { @@ -1376,31 +1292,6 @@ PhaseStatus Compiler::optInductionVariables() continue; } - bool hasOtherUses = false; - for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) - { - if (otherLoop == loop) - continue; - - for (Statement* stmt : otherLoop->GetHeader()->Statements()) - { - if (!stmt->IsPhiDefnStmt()) - break; - - if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lcl->GetLclNum()) - { - hasOtherUses = true; - JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); - break; - } - } - } - - if (hasOtherUses) - { - continue; - } - Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) { @@ -1427,128 +1318,33 @@ PhaseStatus Compiler::optInductionVariables() continue; } - // Start value should always be an SSA use from outside the loop - // since we only widen primary IVs. - assert(addRec->Start->OperIs(ScevOper::Local)); - ScevLocal* startLocal = (ScevLocal*)addRec->Start; - unsigned newLclNum = BAD_VAR_NUM; - for (int i = 0; i < widenedIVs.Height(); i++) - { - WidenedIV& wiv = widenedIVs.BottomRef(i); - if ((wiv.LclNum == startLocal->LclNum) && (wiv.InitSsaNum == startLocal->SsaNum)) - { - newLclNum = wiv.NewLclNum; - JITDUMP(" Reusing previously widened version with initial value V%02u.%u, new local V%02u\n", - wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); - break; - } - } - - int64_t startConstant = 0; - bool initToConstant = startLocal->GetConstantValue(this, &startConstant); - LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); - - BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - BasicBlock* initBlock = preheader; - if (newLclNum != BAD_VAR_NUM) - { - assert(startSsaDsc->GetBlock() != nullptr); - initBlock = startSsaDsc->GetBlock(); - } - else - { - if (startSsaDsc->GetBlock() != nullptr) - { - initBlock = startSsaDsc->GetBlock(); - } - } - - bool reusedIV = newLclNum != BAD_VAR_NUM; - if (!optIsIVWideningProfitable(lcl->GetLclNum(), reusedIV, initBlock, initToConstant, loop)) + if (!optIsIVWideningProfitable(lcl->GetLclNum(), addRec, loop)) { continue; } - changed = true; + changed = true; + unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); - Statement* narrowInitStmt = nullptr; - if ((initBlock != preheader) && (startSsaDsc->GetDefNode() != nullptr)) + GenTree* initVal; + if (addRec->Start->OperIs(ScevOper::Constant)) { - GenTree* narrowInitRoot = startSsaDsc->GetDefNode(); - while (true) - { - GenTree* parent = narrowInitRoot->gtGetParent(nullptr); - if (parent == nullptr) - break; - - narrowInitRoot = parent; - } - - for (Statement* stmt : initBlock->Statements()) - { - if (stmt->GetRootNode() == narrowInitRoot) - { - narrowInitStmt = stmt; - break; - } - } - - assert(narrowInitStmt != nullptr); + ScevConstant* cns = (ScevConstant*)addRec->Start; + initVal = gtNewIconNode((int64_t)(uint32_t)cns->Value, TYP_LONG); } - - Statement* initStmt = nullptr; - if (newLclNum == BAD_VAR_NUM) + else { - newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); - INDEBUG(lclDsc = nullptr); - assert(startLocal->LclNum == lcl->GetLclNum()); - - if (initBlock != preheader) - { - JITDUMP( - "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB - "\n", - initBlock->bbNum); - - // Any other use of this IV can reuse the same wide local/initialization. - widenedIVs.Emplace(startLocal->LclNum, startLocal->SsaNum, newLclNum); - } - else - { - JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); - } - - GenTree* initVal; - if (initToConstant) - { - initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); - } - else - { - initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); - } - - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - initStmt = fgNewStmtFromTree(widenStore); - if (narrowInitStmt != nullptr) - { - fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); - } - else - { - fgInsertStmtNearEnd(initBlock, initStmt); - } - - DISPSTMT(initStmt); - JITDUMP("\n"); + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); } - JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum); - - if (initStmt != nullptr) - { - optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, initStmt->GetNextStmt()); - } + JITDUMP("Adding initialization of new widened local to preheader:\n"); + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); + Statement* initStmt = fgNewStmtFromTree(widenStore); + fgInsertStmtAtEnd(preheader, initStmt); + DISPSTMT(initStmt); + JITDUMP("\n"); loop->VisitLoopBlocks([=](BasicBlock* block) { @@ -1558,7 +1354,7 @@ PhaseStatus Compiler::optInductionVariables() dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); - optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt); + optReplaceWidenedIV(lcl->GetLclNum(), newLclNum, stmt); } return BasicBlockVisit::Continue; From e0905f7f1c57142aa7d7915187c1c972ea11fd94 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 15:44:23 +0100 Subject: [PATCH 42/64] Improve --- src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/inductionvariableopts.cpp | 382 +++++++++++++++++----- 2 files changed, 295 insertions(+), 92 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 023776268a4705..07cadbb8eec6ec 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,8 +7407,9 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, struct ScevAddRec* addRec, FlowGraphNaturalLoop* loop); - void optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt); + bool optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); + void optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); + void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); // Redundant branch opts diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 51a0da0910bc8f..e0b91c524e9fa7 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -58,6 +58,8 @@ struct Scev { return Type == type; } + + bool GetConstantValue(Compiler* comp, int64_t* cns); }; struct ScevConstant : Scev @@ -78,6 +80,31 @@ struct ScevLocal : Scev const unsigned LclNum; const unsigned SsaNum; + + //------------------------------------------------------------------------ + // GetConstantValue: If this SSA use refers to a constant, then fetch that + // constant. + // + // Parameters: + // comp - Compiler instance + // cns - [out] Constant value; only valid if this function returns true. + // + // Returns: + // True if this SSA use refers to a constant; otherwise false, + // + bool GetConstantValue(Compiler* comp, int64_t* cns) + { + LclVarDsc* dsc = comp->lvaGetDesc(LclNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum); + GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode(); + if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + *cns = defNode->Data()->AsIntConCommon()->IntegralValue(); + return true; + } + + return false; + } }; struct ScevUnop : Scev @@ -111,6 +138,34 @@ struct ScevAddRec : Scev Scev* const Step; }; +//------------------------------------------------------------------------ +// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an +// inline constant or an SSA use referring to a constant) then obtain that +// constant. +// +// Parameters: +// comp - Compiler instance +// cns - [out] Constant value; only valid if this function returns true. +// +// Returns: +// True if a constant could be extracted. +// +bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) +{ + if (OperIs(ScevOper::Constant)) + { + *cns = ((ScevConstant*)this)->Value; + return true; + } + + if (OperIs(ScevOper::Local)) + { + return ((ScevLocal*)this)->GetConstantValue(comp, cns); + } + + return false; +} + typedef JitHashTable, Scev*> ScalarEvolutionMap; // Scalar evolution is analyzed in the context of a single loop, and are @@ -125,7 +180,7 @@ class ScalarEvolutionContext Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - Scev* start, + ScevLocal* start, BasicBlock* stepDefBlock, GenTree* stepDefData); Scev* CreateSimpleInvariantScev(GenTree* tree); @@ -259,6 +314,12 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) { ScevLocal* invariantLocal = (ScevLocal*)scev; printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); + + int64_t cns; + if (invariantLocal->GetConstantValue(m_comp, &cns)) + { + printf(" (%lld)", (long long)cns); + } break; } case ScevOper::ZeroExtend: @@ -399,15 +460,6 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) { - // Invariant local - GenTreeLclVarCommon* def = ssaDsc->GetDefNode(); - if ((def != nullptr) && def->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) - { - // For constant definitions from outside the loop we prefer - // to inline the constant. - return CreateScevForConstant(def->Data()->AsIntConCommon()); - } - return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); } @@ -465,12 +517,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d return nullptr; } - Scev* enterScev = Analyze(block, enterSsa); - - if (enterScev == nullptr) - { - return nullptr; - } + ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum()); LclVarDsc* dsc = m_comp->lvaGetDesc(store); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); @@ -592,7 +639,7 @@ Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int d // SCEV node if this is a simple addrec shape. Otherwise nullptr. // Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - Scev* enterScev, + ScevLocal* enterScev, BasicBlock* stepDefBlock, GenTree* stepDefData) { @@ -877,19 +924,18 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return BasicBlockVisit::Continue; } - //for (BasicBlock* pred : exit->PredBlocks()) - //{ - // if (!loop->ContainsBlock(pred)) - // { - // JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP - // "; it has a non-loop pred " FMT_BB "\n", - // lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum); - // return BasicBlockVisit::Abort; - // } - //} - - //return BasicBlockVisit::Continue; - return BasicBlockVisit::Abort; + for (BasicBlock* pred : exit->PredBlocks()) + { + if (!loop->ContainsBlock(pred)) + { + JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP + "; it has a non-loop pred " FMT_BB "\n", + lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum); + return BasicBlockVisit::Abort; + } + } + + return BasicBlockVisit::Continue; }); #ifdef DEBUG @@ -921,9 +967,9 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // optIsIVWideningProfitable: Check to see if IV widening is profitable. // // Parameters: -// lclNum - The primary induction variable -// addRec - Value of the induction variable -// loop - The loop +// lclNum - The primary induction variable +// needsInitialization - Whether or not the widened IV will need explicit initialization +// loop - The loop // // Returns: // True if IV widening is profitable. @@ -938,33 +984,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) { - if (!addRec->Start->OperIs(ScevOper::Constant)) - { - //for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) - //{ - // if (otherLoop == loop) - // continue; - - // for (Statement* stmt : otherLoop->GetHeader()->Statements()) - // { - // if (!stmt->IsPhiDefnStmt()) - // break; - - // if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum) - // { - // JITDUMP(" " FMT_LP " also has V%02u as an IV; skipping widening\n", otherLoop->GetIndex(), lclNum); - // return false; - // } - // } - //} - return false; - } - - //if (loop->GetHeader()->HasFlag(BBF_CLONED_LOOP_HEADER)) - // return false; - struct CountZeroExtensionsVisitor : GenTreeVisitor { private: @@ -1040,16 +1061,21 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, ScevAddRec* addRec, Fl return BasicBlockVisit::Continue; }); - if (!addRec->Start->OperIs(ScevOper::Constant)) + if (reusedIV) { - // Need to insert a move from the narrow local in the preheader. - savedSize -= ExtensionSize; - savedCost -= loop->EntryEdge(0)->getSourceBlock()->getBBWeight(this) * ExtensionCost; + // If we are reusing the IV then widening is going to remove a use of + // the original narrow local, and we assume this leads to more DCE. + savedSize += ExtensionSize; + savedCost += initBlock->getBBWeight(this) * ExtensionCost; } - else + else if (!initedToConstant) { - // If this is a constant then we make the assumption that we will be - // able to DCE the constant initialization of the narrow local. + // We will need to store the narrow IV into the wide one in the init + // block. We only cost this when init value is not a constant since + // otherwise we assume that constant initialization of the narrow local + // will be DCE'd. + savedSize -= ExtensionSize; + savedCost -= initBlock->getBBWeight(this) * ExtensionCost; } // Now account for the cost of sinks. @@ -1133,14 +1159,19 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa // newLclNum - Wide version of primary induction variable // stmt - The statement to replace uses in. // -void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statement* stmt) +void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt) { struct ReplaceVisitor : GenTreeVisitor { private: unsigned m_lclNum; + unsigned m_ssaNum; unsigned m_newLclNum; + bool IsLocal(GenTreeLclVarCommon* tree) + { + return (tree->GetLclNum() == m_lclNum) && ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum)); + } public: bool MadeChanges = false; @@ -1149,8 +1180,8 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen DoPreOrder = true, }; - ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned newLclNum) - : GenTreeVisitor(comp), m_lclNum(lclNum), m_newLclNum(newLclNum) + ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned ssaNum, unsigned newLclNum) + : GenTreeVisitor(comp), m_lclNum(lclNum), m_ssaNum(ssaNum), m_newLclNum(newLclNum) { } @@ -1164,7 +1195,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) { GenTree* op = cast->CastOp(); - if (op->OperIs(GT_LCL_VAR) && (op->AsLclVarCommon()->GetLclNum() == m_lclNum)) + if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon())) { *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG); MadeChanges = true; @@ -1173,7 +1204,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } } else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) && - (node->AsLclVarCommon()->GetLclNum() == m_lclNum)) + IsLocal(node->AsLclVarCommon())) { switch (node->OperGet()) { @@ -1204,7 +1235,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } }; - ReplaceVisitor visitor(this, lclNum, newLclNum); + ReplaceVisitor visitor(this, lclNum, ssaNum, newLclNum); visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); if (visitor.MadeChanges) { @@ -1220,6 +1251,41 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned newLclNum, Statemen } } +//------------------------------------------------------------------------ +// optBestEffortReplaceNarrowIVUsesWith: Try to find and replace uses of the specified +// SSA def with a new local. +// +// Parameters: +// lclNum - Previous local +// ssaNum - Previous local SSA num +// newLclNum - New local to replace with +// block - Block to replace in +// firstStmt - First statement in "block" to start replacing in +// +void Compiler::optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) +{ + JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, firstStmt == nullptr ? 0 : firstStmt->GetID()); + + for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt()) + { + JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, + dspTreeID(stmt->GetRootNode())); + DISPSTMT(stmt); + JITDUMP("\n"); + + optReplaceWidenedIV(lclNum, ssaNum, newLclNum, stmt); + } + + block->VisitRegularSuccs(this, [=](BasicBlock* succ) { + if (succ->GetUniquePred(this) == block) + { + optBestEffortReplaceNarrowIVUsesWith(lclNum, ssaNum, newLclNum, succ, succ->firstStmt()); + } + + return BasicBlockVisit::Continue; + }); +} + //------------------------------------------------------------------------ // optInductionVariables: Try and optimize induction variables in the method. // @@ -1255,8 +1321,24 @@ PhaseStatus Compiler::optInductionVariables() #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + //m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); + + fgDumpFlowGraph(PHASE_OPTIMIZE_INDUCTION_VARIABLES, PhasePosition::PostPhase); ScalarEvolutionContext scevContext(this); + struct WidenedIV + { + unsigned LclNum; + unsigned InitSsaNum; + unsigned NewLclNum; + + WidenedIV(unsigned lclNum, unsigned initSsaNum, unsigned newLclNum) + : LclNum(lclNum), InitSsaNum(initSsaNum), NewLclNum(newLclNum) + { + } + }; + ArrayStack widenedIVs(getAllocator(CMK_LoopScalarEvolution)); + JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { @@ -1292,6 +1374,31 @@ PhaseStatus Compiler::optInductionVariables() continue; } + bool hasOtherUses = false; + for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) + { + if (otherLoop == loop) + continue; + + for (Statement* stmt : otherLoop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + break; + + if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lcl->GetLclNum()) + { + hasOtherUses = true; + JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); + break; + } + } + } + + if (hasOtherUses) + { + continue; + } + Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) { @@ -1318,33 +1425,128 @@ PhaseStatus Compiler::optInductionVariables() continue; } - if (!optIsIVWideningProfitable(lcl->GetLclNum(), addRec, loop)) + // Start value should always be an SSA use from outside the loop + // since we only widen primary IVs. + assert(addRec->Start->OperIs(ScevOper::Local)); + ScevLocal* startLocal = (ScevLocal*)addRec->Start; + unsigned newLclNum = BAD_VAR_NUM; + for (int i = 0; i < widenedIVs.Height(); i++) { - continue; + WidenedIV& wiv = widenedIVs.BottomRef(i); + if ((wiv.LclNum == startLocal->LclNum) && (wiv.InitSsaNum == startLocal->SsaNum)) + { + newLclNum = wiv.NewLclNum; + JITDUMP(" Reusing previously widened version with initial value V%02u.%u, new local V%02u\n", + wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); + break; + } } - changed = true; - unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); - JITDUMP(" Replacing V%02u with a widened version V%02u\n", lcl->GetLclNum(), newLclNum); + int64_t startConstant = 0; + bool initToConstant = startLocal->GetConstantValue(this, &startConstant); + LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); - GenTree* initVal; - if (addRec->Start->OperIs(ScevOper::Constant)) + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); + BasicBlock* initBlock = preheader; + if (newLclNum != BAD_VAR_NUM) { - ScevConstant* cns = (ScevConstant*)addRec->Start; - initVal = gtNewIconNode((int64_t)(uint32_t)cns->Value, TYP_LONG); + assert(startSsaDsc->GetBlock() != nullptr); + initBlock = startSsaDsc->GetBlock(); } else { - initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); + if (startSsaDsc->GetBlock() != nullptr) + { + initBlock = startSsaDsc->GetBlock(); + } } - JITDUMP("Adding initialization of new widened local to preheader:\n"); - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - Statement* initStmt = fgNewStmtFromTree(widenStore); - fgInsertStmtAtEnd(preheader, initStmt); - DISPSTMT(initStmt); - JITDUMP("\n"); + bool reusedIV = newLclNum != BAD_VAR_NUM; + if (!optIsIVWideningProfitable(lcl->GetLclNum(), reusedIV, initBlock, initToConstant, loop)) + { + continue; + } + + changed = true; + + Statement* narrowInitStmt = nullptr; + if ((initBlock != preheader) && (startSsaDsc->GetDefNode() != nullptr)) + { + GenTree* narrowInitRoot = startSsaDsc->GetDefNode(); + while (true) + { + GenTree* parent = narrowInitRoot->gtGetParent(nullptr); + if (parent == nullptr) + break; + + narrowInitRoot = parent; + } + + for (Statement* stmt : initBlock->Statements()) + { + if (stmt->GetRootNode() == narrowInitRoot) + { + narrowInitStmt = stmt; + break; + } + } + + assert(narrowInitStmt != nullptr); + } + + Statement* initStmt = nullptr; + if (newLclNum == BAD_VAR_NUM) + { + newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + INDEBUG(lclDsc = nullptr); + assert(startLocal->LclNum == lcl->GetLclNum()); + + if (initBlock != preheader) + { + JITDUMP( + "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB + "\n", + initBlock->bbNum); + + // Any other use of this IV can reuse the same wide local/initialization. + widenedIVs.Emplace(startLocal->LclNum, startLocal->SsaNum, newLclNum); + } + else + { + JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); + } + + GenTree* initVal; + if (initToConstant) + { + initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); + } + else + { + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); + } + + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + initStmt = fgNewStmtFromTree(widenStore); + if (narrowInitStmt != nullptr) + { + fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); + } + else + { + fgInsertStmtNearEnd(initBlock, initStmt); + } + + DISPSTMT(initStmt); + JITDUMP("\n"); + } + + JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum); + + if (initStmt != nullptr) + { + optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, initStmt->GetNextStmt()); + } loop->VisitLoopBlocks([=](BasicBlock* block) { @@ -1354,7 +1556,7 @@ PhaseStatus Compiler::optInductionVariables() dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); - optReplaceWidenedIV(lcl->GetLclNum(), newLclNum, stmt); + optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt); } return BasicBlockVisit::Continue; From 5a6f2f8c2db9faecafec236fdb8b0e216f86c4f5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 15:44:57 +0100 Subject: [PATCH 43/64] Run jit-format --- src/coreclr/jit/compiler.h | 6 ++-- src/coreclr/jit/inductionvariableopts.cpp | 40 +++++++++++++---------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 07cadbb8eec6ec..8829da05af7f38 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,8 +7407,10 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); - void optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); + bool optIsIVWideningProfitable( + unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); + void optBestEffortReplaceNarrowIVUsesWith( + unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index e0b91c524e9fa7..b8efe611f16bce 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -984,7 +984,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable( + unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) { struct CountZeroExtensionsVisitor : GenTreeVisitor { @@ -1170,8 +1171,10 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne bool IsLocal(GenTreeLclVarCommon* tree) { - return (tree->GetLclNum() == m_lclNum) && ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum)); + return (tree->GetLclNum() == m_lclNum) && + ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum)); } + public: bool MadeChanges = false; @@ -1262,14 +1265,15 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne // block - Block to replace in // firstStmt - First statement in "block" to start replacing in // -void Compiler::optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) +void Compiler::optBestEffortReplaceNarrowIVUsesWith( + unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) { - JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, firstStmt == nullptr ? 0 : firstStmt->GetID()); + JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, + firstStmt == nullptr ? 0 : firstStmt->GetID()); for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt()) { - JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, - dspTreeID(stmt->GetRootNode())); + JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); @@ -1283,7 +1287,7 @@ void Compiler::optBestEffortReplaceNarrowIVUsesWith(unsigned lclNum, unsigned ss } return BasicBlockVisit::Continue; - }); + }); } //------------------------------------------------------------------------ @@ -1321,7 +1325,7 @@ PhaseStatus Compiler::optInductionVariables() #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - //m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); + // m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); fgDumpFlowGraph(PHASE_OPTIMIZE_INDUCTION_VARIABLES, PhasePosition::PostPhase); @@ -1388,7 +1392,8 @@ PhaseStatus Compiler::optInductionVariables() if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lcl->GetLclNum()) { hasOtherUses = true; - JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); + JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), + dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); break; } } @@ -1437,14 +1442,14 @@ PhaseStatus Compiler::optInductionVariables() { newLclNum = wiv.NewLclNum; JITDUMP(" Reusing previously widened version with initial value V%02u.%u, new local V%02u\n", - wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); + wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); break; } } - int64_t startConstant = 0; - bool initToConstant = startLocal->GetConstantValue(this, &startConstant); - LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); + int64_t startConstant = 0; + bool initToConstant = startLocal->GetConstantValue(this, &startConstant); + LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); BasicBlock* initBlock = preheader; @@ -1461,7 +1466,7 @@ PhaseStatus Compiler::optInductionVariables() } } - bool reusedIV = newLclNum != BAD_VAR_NUM; + bool reusedIV = newLclNum != BAD_VAR_NUM; if (!optIsIVWideningProfitable(lcl->GetLclNum(), reusedIV, initBlock, initToConstant, loop)) { continue; @@ -1526,8 +1531,8 @@ PhaseStatus Compiler::optInductionVariables() initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); } - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - initStmt = fgNewStmtFromTree(widenStore); + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + initStmt = fgNewStmtFromTree(widenStore); if (narrowInitStmt != nullptr) { fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); @@ -1545,7 +1550,8 @@ PhaseStatus Compiler::optInductionVariables() if (initStmt != nullptr) { - optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, initStmt->GetNextStmt()); + optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, + initStmt->GetNextStmt()); } loop->VisitLoopBlocks([=](BasicBlock* block) { From 2836ef11fe47fe16daa0e736d35440872d3bd68b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 15:49:27 +0100 Subject: [PATCH 44/64] Clean up --- src/coreclr/jit/inductionvariableopts.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index b8efe611f16bce..9c0a1d198400d6 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1325,9 +1325,6 @@ PhaseStatus Compiler::optInductionVariables() #if defined(TARGET_XARCH) && defined(TARGET_64BIT) m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - // m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); - - fgDumpFlowGraph(PHASE_OPTIMIZE_INDUCTION_VARIABLES, PhasePosition::PostPhase); ScalarEvolutionContext scevContext(this); struct WidenedIV From 41c250324aeb50b44ae3e368bd8da4fd31a492b0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 16:28:24 +0100 Subject: [PATCH 45/64] Clean up --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/inductionvariableopts.cpp | 179 ++++++++-------------- 2 files changed, 64 insertions(+), 117 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8829da05af7f38..58d9aa0b6de66f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7408,7 +7408,7 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); bool optIsIVWideningProfitable( - unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); + unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); void optBestEffortReplaceNarrowIVUsesWith( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 9c0a1d198400d6..1ed1f9d89c9e49 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -985,8 +985,30 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // the exits where the narrow IV is live-in. // bool Compiler::optIsIVWideningProfitable( - unsigned lclNum, bool reusedIV, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) + unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) { + for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) + { + if (otherLoop == loop) + continue; + + for (Statement* stmt : otherLoop->GetHeader()->Statements()) + { + if (!stmt->IsPhiDefnStmt()) + break; + + if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum) + { + JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lclNum, + dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); + // TODO-CQ: We can legally widen these cases, but LSRA is + // unhappy about some of the lifetimes we create when we do + // this. This particularly affects cloned loops. + return false; + } + } + } + struct CountZeroExtensionsVisitor : GenTreeVisitor { private: @@ -1014,7 +1036,7 @@ bool Compiler::optIsIVWideningProfitable( } GenTreeCast* cast = node->AsCast(); - if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned()) + if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned() || cast->gtOverflow()) { return WALK_CONTINUE; } @@ -1062,14 +1084,7 @@ bool Compiler::optIsIVWideningProfitable( return BasicBlockVisit::Continue; }); - if (reusedIV) - { - // If we are reusing the IV then widening is going to remove a use of - // the original narrow local, and we assume this leads to more DCE. - savedSize += ExtensionSize; - savedCost += initBlock->getBBWeight(this) * ExtensionCost; - } - else if (!initedToConstant) + if (!initedToConstant) { // We will need to store the narrow IV into the wide one in the init // block. We only cost this when init value is not a constant since @@ -1194,8 +1209,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne if (node->OperIs(GT_CAST)) { GenTreeCast* cast = node->AsCast(); - // TODO: Overflows - if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned()) + if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned() && !cast->gtOverflow()) { GenTree* op = cast->CastOp(); if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon())) @@ -1327,19 +1341,6 @@ PhaseStatus Compiler::optInductionVariables() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); ScalarEvolutionContext scevContext(this); - struct WidenedIV - { - unsigned LclNum; - unsigned InitSsaNum; - unsigned NewLclNum; - - WidenedIV(unsigned lclNum, unsigned initSsaNum, unsigned newLclNum) - : LclNum(lclNum), InitSsaNum(initSsaNum), NewLclNum(newLclNum) - { - } - }; - ArrayStack widenedIVs(getAllocator(CMK_LoopScalarEvolution)); - JITDUMP("Widening primary induction variables:\n"); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { @@ -1375,32 +1376,6 @@ PhaseStatus Compiler::optInductionVariables() continue; } - bool hasOtherUses = false; - for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) - { - if (otherLoop == loop) - continue; - - for (Statement* stmt : otherLoop->GetHeader()->Statements()) - { - if (!stmt->IsPhiDefnStmt()) - break; - - if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lcl->GetLclNum()) - { - hasOtherUses = true; - JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lcl->GetLclNum(), - dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum); - break; - } - } - } - - if (hasOtherUses) - { - continue; - } - Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode()); if (scev == nullptr) { @@ -1431,40 +1406,18 @@ PhaseStatus Compiler::optInductionVariables() // since we only widen primary IVs. assert(addRec->Start->OperIs(ScevOper::Local)); ScevLocal* startLocal = (ScevLocal*)addRec->Start; - unsigned newLclNum = BAD_VAR_NUM; - for (int i = 0; i < widenedIVs.Height(); i++) - { - WidenedIV& wiv = widenedIVs.BottomRef(i); - if ((wiv.LclNum == startLocal->LclNum) && (wiv.InitSsaNum == startLocal->SsaNum)) - { - newLclNum = wiv.NewLclNum; - JITDUMP(" Reusing previously widened version with initial value V%02u.%u, new local V%02u\n", - wiv.LclNum, wiv.InitSsaNum, wiv.NewLclNum); - break; - } - } - int64_t startConstant = 0; bool initToConstant = startLocal->GetConstantValue(this, &startConstant); LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); BasicBlock* initBlock = preheader; - if (newLclNum != BAD_VAR_NUM) + if (startSsaDsc->GetBlock() != nullptr) { - assert(startSsaDsc->GetBlock() != nullptr); initBlock = startSsaDsc->GetBlock(); } - else - { - if (startSsaDsc->GetBlock() != nullptr) - { - initBlock = startSsaDsc->GetBlock(); - } - } - bool reusedIV = newLclNum != BAD_VAR_NUM; - if (!optIsIVWideningProfitable(lcl->GetLclNum(), reusedIV, initBlock, initToConstant, loop)) + if (!optIsIVWideningProfitable(lcl->GetLclNum(), initBlock, initToConstant, loop)) { continue; } @@ -1497,51 +1450,45 @@ PhaseStatus Compiler::optInductionVariables() } Statement* initStmt = nullptr; - if (newLclNum == BAD_VAR_NUM) - { - newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); - INDEBUG(lclDsc = nullptr); - assert(startLocal->LclNum == lcl->GetLclNum()); - - if (initBlock != preheader) - { - JITDUMP( - "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB - "\n", - initBlock->bbNum); - - // Any other use of this IV can reuse the same wide local/initialization. - widenedIVs.Emplace(startLocal->LclNum, startLocal->SsaNum, newLclNum); - } - else - { - JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); - } + unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + INDEBUG(lclDsc = nullptr); + assert(startLocal->LclNum == lcl->GetLclNum()); - GenTree* initVal; - if (initToConstant) - { - initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); - } - else - { - initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); - } + if (initBlock != preheader) + { + JITDUMP( + "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB + "\n", + initBlock->bbNum); + } + else + { + JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum); + } - GenTree* widenStore = gtNewTempStore(newLclNum, initVal); - initStmt = fgNewStmtFromTree(widenStore); - if (narrowInitStmt != nullptr) - { - fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); - } - else - { - fgInsertStmtNearEnd(initBlock, initStmt); - } + GenTree* initVal; + if (initToConstant) + { + initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); + } + else + { + initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG); + } - DISPSTMT(initStmt); - JITDUMP("\n"); + GenTree* widenStore = gtNewTempStore(newLclNum, initVal); + initStmt = fgNewStmtFromTree(widenStore); + if (narrowInitStmt != nullptr) + { + fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); } + else + { + fgInsertStmtNearEnd(initBlock, initStmt); + } + + DISPSTMT(initStmt); + JITDUMP("\n"); JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum); From a3f4a2e58446ee38a717fbf93a47b5edd56d065c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 16 Feb 2024 22:03:39 +0100 Subject: [PATCH 46/64] Run jit-format --- src/coreclr/jit/compiler.h | 6 ++++-- src/coreclr/jit/inductionvariableopts.cpp | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 58d9aa0b6de66f..f10ed9b4ee4f87 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7407,8 +7407,10 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable( - unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop); + bool optIsIVWideningProfitable(unsigned lclNum, + BasicBlock* initBlock, + bool initedToConstant, + FlowGraphNaturalLoop* loop); void optBestEffortReplaceNarrowIVUsesWith( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 1ed1f9d89c9e49..e5e6df4d84dd20 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -984,8 +984,10 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable( - unsigned lclNum, BasicBlock* initBlock, bool initedToConstant, FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, + BasicBlock* initBlock, + bool initedToConstant, + FlowGraphNaturalLoop* loop) { for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) { @@ -1405,7 +1407,7 @@ PhaseStatus Compiler::optInductionVariables() // Start value should always be an SSA use from outside the loop // since we only widen primary IVs. assert(addRec->Start->OperIs(ScevOper::Local)); - ScevLocal* startLocal = (ScevLocal*)addRec->Start; + ScevLocal* startLocal = (ScevLocal*)addRec->Start; int64_t startConstant = 0; bool initToConstant = startLocal->GetConstantValue(this, &startConstant); LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum); @@ -1449,17 +1451,16 @@ PhaseStatus Compiler::optInductionVariables() assert(narrowInitStmt != nullptr); } - Statement* initStmt = nullptr; - unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); + Statement* initStmt = nullptr; + unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum()))); INDEBUG(lclDsc = nullptr); assert(startLocal->LclNum == lcl->GetLclNum()); if (initBlock != preheader) { - JITDUMP( - "Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB - "\n", - initBlock->bbNum); + JITDUMP("Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB + "\n", + initBlock->bbNum); } else { From 6812397e2b6dc0d1f2eeb3e1ca5457cc1a1aea2d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 14:32:31 +0100 Subject: [PATCH 47/64] Insert after phis correctly --- src/coreclr/jit/inductionvariableopts.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index e5e6df4d84dd20..d0e126112dfa77 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1426,7 +1426,7 @@ PhaseStatus Compiler::optInductionVariables() changed = true; - Statement* narrowInitStmt = nullptr; + Statement* insertInitAfter = nullptr; if ((initBlock != preheader) && (startSsaDsc->GetDefNode() != nullptr)) { GenTree* narrowInitRoot = startSsaDsc->GetDefNode(); @@ -1443,12 +1443,20 @@ PhaseStatus Compiler::optInductionVariables() { if (stmt->GetRootNode() == narrowInitRoot) { - narrowInitStmt = stmt; + insertInitAfter = stmt; break; } } - assert(narrowInitStmt != nullptr); + assert(insertInitAfter != nullptr); + + if (insertInitAfter->IsPhiDefnStmt()) + { + while ((insertInitAfter->GetNextStmt() != nullptr) && insertInitAfter->GetNextStmt()->IsPhiDefnStmt()) + { + insertInitAfter = insertInitAfter->GetNextStmt(); + } + } } Statement* initStmt = nullptr; @@ -1479,9 +1487,9 @@ PhaseStatus Compiler::optInductionVariables() GenTree* widenStore = gtNewTempStore(newLclNum, initVal); initStmt = fgNewStmtFromTree(widenStore); - if (narrowInitStmt != nullptr) + if (insertInitAfter != nullptr) { - fgInsertStmtAfter(initBlock, narrowInitStmt, initStmt); + fgInsertStmtAfter(initBlock, insertInitAfter, initStmt); } else { From 439acdad9bf0cdcc44debe8a08c2b9902af88386 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 14:33:21 +0100 Subject: [PATCH 48/64] Squashed --- src/coreclr/inc/corinfo.h | 6 + src/coreclr/inc/icorjitinfoimpl_generated.h | 4 + src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/jit/CMakeLists.txt | 3 + src/coreclr/jit/ICorJitInfo_names_generated.h | 1 + .../jit/ICorJitInfo_wrapper_generated.hpp | 9 ++ src/coreclr/jit/codegencommon.cpp | 8 +- src/coreclr/jit/compiler.cpp | 44 +++-- src/coreclr/jit/compiler.h | 12 +- src/coreclr/jit/emit.cpp | 2 +- src/coreclr/jit/emitxarch.cpp | 4 +- src/coreclr/jit/fgbasic.cpp | 3 +- src/coreclr/jit/jitmetadata.cpp | 124 ++++++++++++++ src/coreclr/jit/jitmetadata.h | 30 ++++ src/coreclr/jit/jitmetadatalist.h | 37 +++++ src/coreclr/jit/loopcloning.cpp | 8 +- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/optcse.cpp | 1 + src/coreclr/jit/optimizer.cpp | 14 +- src/coreclr/jit/promotion.cpp | 2 + src/coreclr/jit/redundantbranchopts.cpp | 2 + src/coreclr/jit/ssabuilder.cpp | 3 + src/coreclr/pal/src/safecrt/vsprintf.cpp | 2 +- src/coreclr/scripts/superpmi.py | 52 ++++-- .../tools/Common/JitInterface/CorInfoImpl.cs | 6 + .../JitInterface/CorInfoImpl_generated.cs | 153 ++++++++++-------- .../ThunkGenerator/ThunkInput.txt | 1 + .../aot/jitinterface/jitinterface_generated.h | 10 ++ .../superpmi-shared/compileresult.cpp | 6 + .../superpmi/superpmi-shared/compileresult.h | 11 ++ .../superpmi-shared/jitmetadatalist.h | 1 + .../superpmi-shim-collector/icorjitinfo.cpp | 6 + .../icorjitinfo_generated.cpp | 8 + .../icorjitinfo_generated.cpp | 7 + .../tools/superpmi/superpmi/fileio.cpp | 74 ++++++++- src/coreclr/tools/superpmi/superpmi/fileio.h | 6 + .../tools/superpmi/superpmi/icorjitinfo.cpp | 35 ++++ .../tools/superpmi/superpmi/jitinstance.cpp | 1 + .../tools/superpmi/superpmi/jitinstance.h | 1 + .../tools/superpmi/superpmi/superpmi.cpp | 59 +++++-- src/coreclr/vm/jitinterface.cpp | 21 +++ src/coreclr/vm/jitinterface.h | 2 + 42 files changed, 641 insertions(+), 150 deletions(-) create mode 100644 src/coreclr/jit/jitmetadata.cpp create mode 100644 src/coreclr/jit/jitmetadata.h create mode 100644 src/coreclr/jit/jitmetadatalist.h create mode 100644 src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 86680d6e20c91e..ac83351a8da30e 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -2916,6 +2916,12 @@ class ICorStaticInfo uint32_t numMappings // [IN] Number of rich mappings ) = 0; + // Report back some metadata about the compilation to the EE -- for + // example, metrics about the compilation. + virtual void reportMetadata( + const char* key, + const void* value) = 0; + /*-------------------------- Misc ---------------------------------------*/ // Used to allocate memory that needs to handed to the EE. diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 8dd993f5b47829..8a14cdb0f99f1e 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -438,6 +438,10 @@ void reportRichMappings( ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings) override; +void reportMetadata( + const char* key, + const void* value) override; + void* allocateArray( size_t cBytes) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 6355fc20dd0fd5..646f1b169330a2 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 0fb71692-0ee6-4914-88a8-6446e45f23e8 */ - 0x0fb71692, - 0x0ee6, - 0x4914, - {0x88, 0xa8, 0x64, 0x46, 0xe4, 0x5f, 0x23, 0xe8} +constexpr GUID JITEEVersionIdentifier = { /* 1f30d12b-38f1-4f1e-a08a-831def882aa4 */ + 0x1f30d12b, + 0x38f1, + 0x4f1e, + {0xa0, 0x8a, 0x83, 0x1d, 0xef, 0x88, 0x2a, 0xa4} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index aa660321075890..ae08a27e4c00aa 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -137,6 +137,7 @@ set( JIT_SOURCES jitconfig.cpp jiteh.cpp jithashtable.cpp + jitmetadata.cpp lclmorph.cpp lclvars.cpp likelyclass.cpp @@ -334,6 +335,8 @@ set( JIT_HEADERS jitexpandarray.h jitgcinfo.h jithashtable.h + jitmetadata.h + jitmetadatalist.h jitpch.h jitstd.h lir.h diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index 5fe1f716d474b8..3908d4bcdc4be2 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -108,6 +108,7 @@ DEF_CLR_API(setBoundaries) DEF_CLR_API(getVars) DEF_CLR_API(setVars) DEF_CLR_API(reportRichMappings) +DEF_CLR_API(reportMetadata) DEF_CLR_API(allocateArray) DEF_CLR_API(freeArray) DEF_CLR_API(getArgNext) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index cae9b5d7b39e59..cb4442df3810cd 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1028,6 +1028,15 @@ void WrapICorJitInfo::reportRichMappings( API_LEAVE(reportRichMappings); } +void WrapICorJitInfo::reportMetadata( + const char* key, + const void* value) +{ + API_ENTER(reportMetadata); + wrapHnd->reportMetadata(key, value); + API_LEAVE(reportMetadata); +} + void* WrapICorJitInfo::allocateArray( size_t cBytes) { diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 825837fe45ef50..29b911f3160e5b 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -315,10 +315,8 @@ void CodeGen::genPrepForCompiler() } } VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler)); - genLastLiveMask = RBM_NONE; -#ifdef DEBUG - compiler->fgBBcountAtCodegen = compiler->fgBBcount; -#endif + genLastLiveMask = RBM_NONE; + compiler->Metrics.BasicBlocksAtCodegen = compiler->fgBBcount; } //------------------------------------------------------------------------ @@ -2042,7 +2040,7 @@ void CodeGen::genEmitMachineCode() printf("; Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for " "code %d", - codeSize, prologSize, compiler->info.compPerfScore, instrCount, + codeSize, prologSize, compiler->Metrics.PerfScore, instrCount, GetEmitter()->emitTotalHotCodeSize + GetEmitter()->emitTotalColdCodeSize); if (dspMetrics) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b0518aca33f796..a3a3c380bf8d82 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1793,9 +1793,13 @@ void Compiler::compInit(ArenaAllocator* pAlloc, info.compMethodName = eeGetMethodName(methodHnd); info.compClassName = eeGetClassName(info.compClassHnd); info.compFullName = eeGetMethodFullName(methodHnd); - info.compPerfScore = 0.0; info.compMethodSuperPMIIndex = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), -1); + + if (!compIsForInlining()) + { + JitMetadata::report(this, JitMetadataName::MethodFullName, info.compFullName); + } #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS #if defined(DEBUG) @@ -1863,9 +1867,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, // // Initialize all the per-method statistics gathering data structures. // - - optLoopsCloned = 0; - + CLANG_FORMAT_COMMENT_ANCHOR; #if LOOP_HOIST_STATS m_loopsConsidered = 0; m_curLoopHasHoistedExpression = false; @@ -1965,6 +1967,8 @@ void Compiler::compInit(ArenaAllocator* pAlloc, compUsesThrowHelper = false; m_preferredInitCctor = CORINFO_HELP_UNDEF; + + new (&Metrics, jitstd::placement_t()) JitMetrics(); } /***************************************************************************** @@ -5212,7 +5216,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #ifdef DEBUG if (JitConfig.JitMetrics() > 0) { - sprintf_s(metricPart, 128, ", perfScore=%.2f, numCse=%u", info.compPerfScore, optCSEcount); + sprintf_s(metricPart, 128, ", perfScore=%.2f, numCse=%u", Metrics.PerfScore, optCSEcount); } #endif @@ -5416,7 +5420,7 @@ PhaseStatus Compiler::placeLoopAlignInstructions() { block->SetFlags(BBF_LOOP_ALIGN); BitVecOps::AddElemD(&loopTraits, alignedLoops, loop->GetIndex()); - INDEBUG(loopAlignCandidates++); + Metrics.LoopAlignmentCandidates++; BasicBlock* prev = block->Prev(); // shouldAlignLoop should have guaranteed these properties. @@ -5465,7 +5469,7 @@ PhaseStatus Compiler::placeLoopAlignInstructions() } } - JITDUMP("Found %u candidates for loop alignment\n", loopAlignCandidates); + JITDUMP("Found %d candidates for loop alignment\n", Metrics.LoopAlignmentCandidates); return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -6438,6 +6442,8 @@ void Compiler::compCompileFinish() compArenaAllocator->finishMemStats(); memAllocHist.record((unsigned)((compArenaAllocator->getTotalBytesAllocated() + 1023) / 1024)); memUsedHist.record((unsigned)((compArenaAllocator->getTotalBytesUsed() + 1023) / 1024)); + + Metrics.BytesAllocated = (int64_t)compArenaAllocator->getTotalBytesUsed(); } #ifdef DEBUG @@ -6621,7 +6627,7 @@ void Compiler::compCompileFinish() printf(" %3d |", optCallCount); printf(" %3d |", optIndirectCallCount); - printf(" %3d |", fgBBcountAtCodegen); + printf(" %3d |", Metrics.BasicBlocksAtCodegen); printf(" %3d |", lvaCount); if (opts.MinOpts()) @@ -6634,13 +6640,13 @@ void Compiler::compCompileFinish() printf(" %3d |", optCSEcount); } - if (info.compPerfScore < 9999.995) + if (Metrics.PerfScore < 9999.995) { - printf(" %7.2f |", info.compPerfScore); + printf(" %7.2f |", Metrics.PerfScore); } else { - printf(" %7.0f |", info.compPerfScore); + printf(" %7.0f |", Metrics.PerfScore); } printf(" %4d |", info.compMethodInfo->ILCodeSize); @@ -6651,9 +6657,13 @@ void Compiler::compCompileFinish() printf(""); // in our logic this causes a flush } + JITDUMP("Final metrics:\n"); + Metrics.report(this); + DBEXEC(verbose, Metrics.dump()); + if (verbose) { - printf("****** DONE compiling %s\n", info.compFullName); + printf("\n****** DONE compiling %s\n", info.compFullName); printf(""); // in our logic this causes a flush } @@ -7146,6 +7156,8 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, opts.disAsm = false; } + INDEBUG(JitMetadata::report(this, JitMetadataName::TieringName, compGetTieringName(true))); + #if COUNT_BASIC_BLOCKS bbCntTable.record(fgBBcount); @@ -9074,12 +9086,12 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp) fprintf(s_csvFile, "%u,", comp->info.compILCodeSize); fprintf(s_csvFile, "%u,", comp->fgBBcount); fprintf(s_csvFile, "%u,", comp->opts.MinOpts()); - fprintf(s_csvFile, "%u,", comp->optNumNaturalLoopsFound); - fprintf(s_csvFile, "%u,", comp->optLoopsCloned); + fprintf(s_csvFile, "%d,", comp->Metrics.LoopsFoundDuringOpts); + fprintf(s_csvFile, "%d,", comp->Metrics.LoopsCloned); #if FEATURE_LOOP_ALIGN #ifdef DEBUG - fprintf(s_csvFile, "%u,", comp->loopAlignCandidates); - fprintf(s_csvFile, "%u,", comp->loopsAligned); + fprintf(s_csvFile, "%d,", comp->Metrics.LoopAlignmentCandidates); + fprintf(s_csvFile, "%d,", comp->Metrics.LoopsAligned); #endif // DEBUG #endif // FEATURE_LOOP_ALIGN unsigned __int64 totCycles = 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 10a47eb729fa0b..14108f0b252a68 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -65,6 +65,8 @@ inline var_types genActualType(T value); #include "simd.h" #include "simdashwintrinsic.h" +#include "jitmetadata.h" + /***************************************************************************** * Forward declarations */ @@ -4960,7 +4962,6 @@ class Compiler unsigned fgEdgeCount; // # of control flow edges between the BBs unsigned fgBBcount; // # of BBs in the method (in the linked list that starts with fgFirstBB) #ifdef DEBUG - unsigned fgBBcountAtCodegen; // # of BBs in the method at the start of codegen jitstd::vector* fgBBOrder; // ordered vector of BBs #endif // Used as a quick check for whether loop alignment should look for natural loops. @@ -4989,7 +4990,6 @@ class Compiler // 2. All loop exits where bbIsHandlerBeg(exit) is false have only loop predecessors. // bool optLoopsCanonical; - unsigned optNumNaturalLoopsFound; // Number of natural loops found in the loop finding phase bool fgBBVarSetsInited; @@ -6833,16 +6833,11 @@ class Compiler public: bool fgHasLoops; -#ifdef DEBUG - unsigned loopAlignCandidates; // number of candidates identified by placeLoopAlignInstructions - unsigned loopsAligned; // number of loops actually aligned -#endif // DEBUG protected: unsigned optCallCount; // number of calls made in the method unsigned optIndirectCallCount; // number of virtual, interface and indirect calls made in the method unsigned optNativeCallCount; // number of Pinvoke/Native calls made in the method - unsigned optLoopsCloned; // number of loops cloned in the current method. #ifdef DEBUG void optCheckPreds(); @@ -10147,7 +10142,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX const char* compMethodName; const char* compClassName; const char* compFullName; - double compPerfScore; int compMethodSuperPMIIndex; // useful when debugging under SuperPMI #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS @@ -10608,6 +10602,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX static EnregisterStats s_enregisterStats; #endif // TRACK_ENREG_STATS + JitMetrics Metrics; + bool compIsForInlining() const; bool compDonotInline(); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index d3ac84e7919a1d..ecbfe659be1034 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -4393,7 +4393,7 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp) float insExeCost = insEvaluateExecutionCost(id); // All compPerfScore calculations must be performed using doubles double insPerfScore = (double)(ig->igWeight / (double)BB_UNITY_WEIGHT) * insExeCost; - emitComp->info.compPerfScore += insPerfScore; + emitComp->Metrics.PerfScore += insPerfScore; ig->igPerfScore += insPerfScore; #endif // defined(DEBUG) || defined(LATE_DISASM) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 9e80da3bfaf9ab..d429f0d261f7a2 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -12475,10 +12475,10 @@ BYTE* emitter::emitOutputAlign(insGroup* ig, instrDesc* id, BYTE* dst) assert(paddingToAdd == paddingNeeded); } } - - emitComp->loopsAligned++; #endif + emitComp->Metrics.LoopsAligned++; + #ifdef DEBUG // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction, // then add "int3" instruction. Since int3 takes 1 byte, we would only add diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 3800e2ffe89d94..d30e58ce0f03c7 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -49,8 +49,7 @@ void Compiler::fgInit() fgBBcount = 0; #ifdef DEBUG - fgBBcountAtCodegen = 0; - fgBBOrder = nullptr; + fgBBOrder = nullptr; #endif // DEBUG fgMightHaveNaturalLoops = false; diff --git a/src/coreclr/jit/jitmetadata.cpp b/src/coreclr/jit/jitmetadata.cpp new file mode 100644 index 00000000000000..754bb532a84401 --- /dev/null +++ b/src/coreclr/jit/jitmetadata.cpp @@ -0,0 +1,124 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#include "jitmetadata.h" + +#ifdef DEBUG + +//------------------------------------------------------------------------ +// JitMetadata::getName: Get the name corresponding to a JitMetadataName value, +// which can be used to report metadata back to the EE. +// +// Parameters: +// name - The JitMetadataName enum entry +// +// Returns: +// String representation of the name +// +const char* JitMetadata::getName(JitMetadataName name) +{ + switch (name) + { +#define JITMETADATA(name, type, flags) \ + case JitMetadataName::name: \ + return #name; +#include "jitmetadatalist.h" + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// JitMetadata::report: Report metadata back to the EE. +// +// Parameters: +// comp - Compiler instance +// name - Enum entry specifying the metadata name +// data - Pointer to the value to report back +// +void JitMetadata::report(Compiler* comp, JitMetadataName name, const void* data) +{ + comp->info.compCompHnd->reportMetadata(getName(name), data); +} + +//------------------------------------------------------------------------ +// reportValue: Report a specific value back to the EE. +// +// Parameters: +// comp - Compiler instance +// key - The key +// value - Value to report back +// +template +static void reportValue(Compiler* comp, const char* key, T value) +{ + comp->info.compCompHnd->reportMetadata(key, &value); +} + +//------------------------------------------------------------------------ +// JitMetrics::report: Report all metrics and their values back to the EE. +// +// Parameters: +// comp - Compiler instance +// +void JitMetrics::report(Compiler* comp) +{ +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) reportValue(comp, #name, name); +#include "jitmetadatalist.h" +} + +//------------------------------------------------------------------------ +// printMetric: Print a double metric value to jitstdout. +// +// Parameters: +// value - The value +// +static void printMetric(double value) +{ + printf("%f", value); +} + +//------------------------------------------------------------------------ +// printMetric: Print an int metric value to jitstdout. +// +// Parameters: +// value - The value +// +static void printMetric(int value) +{ + printf("%d", value); +} + +//------------------------------------------------------------------------ +// printMetric: Print an int64_t metric value to jitstdout. +// +// Parameters: +// value - The value +// +static void printMetric(int64_t value) +{ + printf("%lld", value); +} + +//------------------------------------------------------------------------ +// JitMetrics::dump: Print the values of all metrics to jitstdout. +// +void JitMetrics::dump() +{ + int nameMaxWidth = 0; +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) nameMaxWidth = max(nameMaxWidth, (int)strlen(#name)); +#include "jitmetadatalist.h" + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) \ + printf("%-*s: ", nameMaxWidth + 5, #name); \ + printMetric(name); \ + printf("\n"); +#include "jitmetadatalist.h" +} + +#endif diff --git a/src/coreclr/jit/jitmetadata.h b/src/coreclr/jit/jitmetadata.h new file mode 100644 index 00000000000000..42ce87d0966541 --- /dev/null +++ b/src/coreclr/jit/jitmetadata.h @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +class Compiler; + +enum class JitMetadataName +{ +#define JITMETADATA(name, type, flags) name, +#include "jitmetadatalist.h" +}; + +class JitMetadata +{ +public: + static const char* getName(JitMetadataName name); + static void report(Compiler* comp, JitMetadataName name, const void* data); +}; + +class JitMetrics +{ +public: +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) type name = 0; +#include "jitmetadatalist.h" + + void report(Compiler* comp); + void dump(); +}; diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h new file mode 100644 index 00000000000000..8422d0ac1a81cf --- /dev/null +++ b/src/coreclr/jit/jitmetadatalist.h @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// clang-format off + +#ifdef JITMETADATA +#define JITMETADATAINFO(name, type, flags) JITMETADATA(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) JITMETADATA(name, type, flags) +#endif + +#if !defined(JITMETADATAINFO) || !defined(JITMETADATAMETRIC) +#error Define JITMETADATAINFO and JITMETADATAMETRIC before including this file. +#endif + +// Name, type flags +JITMETADATAINFO(MethodFullName, const char*, 0) +JITMETADATAINFO(TieringName, const char*, 0) +JITMETADATAMETRIC(PhysicallyPromotedFields, int, 0) +JITMETADATAMETRIC(LoopsFoundDuringOpts, int, 0) +JITMETADATAMETRIC(LoopsCloned, int, 0) +JITMETADATAMETRIC(LoopsUnrolled, int, 0) +JITMETADATAMETRIC(LoopAlignmentCandidates, int, 0) +JITMETADATAMETRIC(LoopsAligned, int, 0) +JITMETADATAMETRIC(VarsInSsa, int, 0) +JITMETADATAMETRIC(HoistedExpressions, int, 0) +JITMETADATAMETRIC(RedundantBranchesEliminated, int, JIT_METADATA_HIGHER_IS_BETTER) +JITMETADATAMETRIC(JumpThreadingsPerformed, int, JIT_METADATA_HIGHER_IS_BETTER) +JITMETADATAMETRIC(Cses, int, 0) +JITMETADATAMETRIC(BasicBlocksAtCodegen, int, 0) +JITMETADATAMETRIC(PerfScore, double, JIT_METADATA_LOWER_IS_BETTER) +JITMETADATAMETRIC(BytesAllocated, int64_t, JIT_METADATA_LOWER_IS_BETTER) + +#undef JITMETADATA +#undef JITMETADATAINFO +#undef JITMETADATAMETRIC + +// clang-format on diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index ca4c2572fa41d9..96cd8bff015939 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2955,19 +2955,19 @@ PhaseStatus Compiler::optCloneLoops() #endif #endif - assert(optLoopsCloned == 0); // It should be initialized, but not yet changed. + assert(Metrics.LoopsCloned == 0); // It should be initialized, but not yet changed. for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { if (context.GetLoopOptInfo(loop->GetIndex()) != nullptr) { - optLoopsCloned++; + Metrics.LoopsCloned++; context.OptimizeConditions(loop->GetIndex() DEBUGARG(verbose)); context.OptimizeBlockConditions(loop->GetIndex() DEBUGARG(verbose)); optCloneLoop(loop, &context); } } - if (optLoopsCloned > 0) + if (Metrics.LoopsCloned > 0) { fgInvalidateDfsTree(); m_dfsTree = fgComputeDfs(); @@ -2986,7 +2986,7 @@ PhaseStatus Compiler::optCloneLoops() #ifdef DEBUG if (verbose) { - printf("Loops cloned: %d\n", optLoopsCloned); + printf("Loops cloned: %d\n", Metrics.LoopsCloned); printf("Loops statically optimized: %d\n", optStaticallyOptimizedLoops); printf("After loop cloning:\n"); fgDispBasicBlocks(/*dumpTrees*/ true); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 04ca6149c9fc36..9ca1d59ed7a5e2 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -10096,7 +10096,7 @@ void LinearScan::dumpLsraStatsCsv(FILE* file) { fprintf(file, ",%u", sumStats[statIndex]); } - fprintf(file, ",%.2f\n", compiler->info.compPerfScore); + fprintf(file, ",%.2f\n", compiler->Metrics.PerfScore); } // ----------------------------------------------------------- diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index e4b3c9faeff4fb..ffc9557e736a7a 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -4370,6 +4370,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) // Record that we created a new LclVar for use as a CSE temp m_addCSEcount++; m_pCompiler->optCSEcount++; + m_pCompiler->Metrics.Cses++; // Walk all references to this CSE, adding an assignment // to the CSE temp to all defs and changing all refs to diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index e2f1d335e21277..287aab0160727c 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -21,13 +21,7 @@ void Compiler::optInit() { fgHasLoops = false; - optLoopsCanonical = false; - optNumNaturalLoopsFound = 0; - -#ifdef DEBUG - loopAlignCandidates = 0; - loopsAligned = 0; -#endif + optLoopsCanonical = false; /* Keep track of the number of calls and indirect calls made by this method */ optCallCount = 0; @@ -1300,6 +1294,8 @@ PhaseStatus Compiler::optUnrollLoops() { assert(anyIRchange); + Metrics.LoopsUnrolled += unrollCount; + #ifdef DEBUG if (verbose) { @@ -2670,7 +2666,7 @@ PhaseStatus Compiler::optFindLoopsPhase() optFindAndScaleGeneralLoopBlocks(); } - optNumNaturalLoopsFound = (unsigned)m_loops->NumLoops(); + Metrics.LoopsFoundDuringOpts = (int)m_loops->NumLoops(); return PhaseStatus::MODIFIED_EVERYTHING; } @@ -5147,6 +5143,8 @@ void Compiler::optHoistCandidate(GenTree* tree, // Record the hoisted expression in hoistCtxt hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); + + Metrics.HoistedExpressions++; } bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNSet* loopVnInvariantCache) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index f66748633a69e3..2f7b1e0b31372c 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1205,6 +1205,8 @@ class LocalsUseVisitor : public GenTreeVisitor } } + m_compiler->Metrics.PhysicallyPromotedFields += totalNumPromotions; + if (totalNumPromotions <= 0) { return false; diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 3ea6142de4cd3f..9dd9a2959e76a3 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -927,6 +927,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) JITDUMP("\nRedundant branch opt in " FMT_BB ":\n", block->bbNum); fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__)); + Metrics.RedundantBranchesEliminated++; return true; } @@ -1683,6 +1684,7 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) // We optimized. // + Metrics.JumpThreadingsPerformed++; fgModified = true; return true; } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 2072591bcd5a9a..fb8b33aa313294 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1181,6 +1181,7 @@ void SsaBuilder::RenameVariables() { JITDUMP("*************** In SsaBuilder::RenameVariables()\n"); + m_pCompiler->Metrics.VarsInSsa = 0; // The first thing we do is treat parameters and must-init variables as if they have a // virtual definition before entry -- they start out at SSA name 1. for (unsigned lclNum = 0; lclNum < m_pCompiler->lvaCount; lclNum++) @@ -1190,6 +1191,8 @@ void SsaBuilder::RenameVariables() continue; } + m_pCompiler->Metrics.VarsInSsa++; + LclVarDsc* varDsc = m_pCompiler->lvaGetDesc(lclNum); assert(varDsc->lvTracked); diff --git a/src/coreclr/pal/src/safecrt/vsprintf.cpp b/src/coreclr/pal/src/safecrt/vsprintf.cpp index b8ff745f563ceb..360222d5dc6798 100644 --- a/src/coreclr/pal/src/safecrt/vsprintf.cpp +++ b/src/coreclr/pal/src/safecrt/vsprintf.cpp @@ -95,7 +95,7 @@ DLLEXPORT int __cdecl _vsnprintf_s ( retvalue = vsnprintf(string, sizeInBytes, format, ap); string[sizeInBytes - 1] = '\0'; /* we allow truncation if count == _TRUNCATE */ - if (retvalue > (int)sizeInBytes && count == _TRUNCATE) + if (retvalue >= (int)sizeInBytes && count == _TRUNCATE) { if (errno == ERANGE) { diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index baca765929ce4c..1c1813a0131101 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1865,8 +1865,8 @@ def aggregate_diff_metrics(details): """ base_minopts = {"Successful compiles": 0, "Missing compiles": 0, "Failing compiles": 0, - "Contexts with diffs": 0, "Diffed code bytes": 0, "Diff executed instructions": 0, - "Diffed contexts": 0} + "Contexts with diffs": 0, "Diffed code bytes": 0, "Diffed PerfScore" : 0.0, + "Diff executed instructions": 0, "Diffed contexts": 0} base_fullopts = base_minopts.copy() diff_minopts = base_minopts.copy() @@ -1910,6 +1910,11 @@ def aggregate_diff_metrics(details): base_dict["Diff executed instructions"] += base_insts diff_dict["Diff executed instructions"] += diff_insts + base_perfscore = float(row["Base PerfScore"]) + diff_perfscore = float(row["Diff PerfScore"]) + base_dict["Diffed PerfScore"] += base_perfscore + diff_dict["Diffed PerfScore"] += diff_perfscore + base_dict["Diffed contexts"] += 1 diff_dict["Diffed contexts"] += 1 @@ -2267,6 +2272,14 @@ def create_exception(): delta_bytes = diff_bytes - base_bytes logging.info("Total bytes of delta: {} ({:.2%} of base)".format(delta_bytes, delta_bytes / base_bytes)) + if "PerfScore" in self.coreclr_args.metrics: + base_perfscore = base_metrics["Overall"]["Diffed PerfScore"] + diff_perfscore = diff_metrics["Overall"]["Diffed PerfScore"] + logging.info("Total PerfScore of base: {}".format(base_perfscore)) + logging.info("Total PerfScore of diff: {}".format(diff_perfscore)) + delta_perfscore = diff_perfscore - base_perfscore + logging.info("Total PerfScore of delta: {} ({:.2%} of base)".format(delta_perfscore, delta_perfscore / base_perfscore)) + try: current_text_diff = text_differences.get_nowait() except: @@ -2290,6 +2303,10 @@ def create_exception(): if self.coreclr_args.metrics: for metric in self.coreclr_args.metrics: command += [ "--metrics", metric ] + + if self.coreclr_args.metrics == ["PerfScore"]: + command += [ "--override-total-base-metric", str(base_perfscore), "--override-total-diff-metric", str(diff_perfscore) ] + elif base_bytes is not None and diff_bytes is not None: command += [ "--override-total-base-metric", str(base_bytes), "--override-total-diff-metric", str(diff_bytes) ] @@ -2668,7 +2685,7 @@ def pick_contexts_to_disassemble(self, diffs): # If there are non-default metrics then we need to disassemble # everything so that jit-analyze can handle those. - if self.coreclr_args.metrics is not None: + if self.coreclr_args.metrics is not None and self.coreclr_args.metrics != ["PerfScore"]: contexts = diffs examples = [] else: @@ -2686,22 +2703,29 @@ def display_subset(message, subset): smallest_contexts = sorted(diffs, key=lambda r: int(r["Context size"]))[:20] display_subset("Smallest {} contexts with binary differences:", smallest_contexts) - # Order by byte-wise improvement, largest improvements first - by_diff_size = sorted(diffs, key=lambda r: int(r["Diff size"]) - int(r["Base size"])) - # 20 top improvements, byte-wise - top_improvements_bytes = by_diff_size[:20] - # 20 top regressions, byte-wise - top_regressions_bytes = by_diff_size[-20:] + if self.coreclr_args.metrics is None: + base_metric_name = "Base size" + diff_metric_name = "Diff size" + else: + base_metric_name = "Base PerfScore" + diff_metric_name = "Diff PerfScore" + + # Order by improvement, largest improvements first + by_diff = sorted(diffs, key=lambda r: float(r[diff_metric_name]) - float(r[base_metric_name])) + # 20 top improvements + top_improvements = by_diff[:20] + # 20 top regressions + top_regressions = by_diff[-20:] - display_subset("Top {} improvements, byte-wise:", top_improvements_bytes) - display_subset("Top {} regressions, byte-wise:", top_regressions_bytes) + display_subset("Top {} improvements:", top_improvements) + display_subset("Top {} regressions:", top_regressions) # Order by percentage-wise size improvement, largest improvements first def diff_pct(r): - base = int(r["Base size"]) + base = float(r[base_metric_name]) if base == 0: return 0 - diff = int(r["Diff size"]) + diff = float(r[diff_metric_name]) return (diff - base) / base by_diff_size_pct = sorted(diffs, key=diff_pct) @@ -2723,7 +2747,7 @@ def diff_pct(r): example_improvements = by_diff_size_pct_examples[:3] example_regressions = by_diff_size_pct_examples[3:][-3:] - contexts = smallest_contexts + top_improvements_bytes + top_regressions_bytes + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions + contexts = smallest_contexts + top_improvements + top_regressions + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions examples = example_improvements + example_regressions final_contexts_indices = list(set(int(r["Context"]) for r in contexts)) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 2e72a18def7713..d1bd90f1688f4c 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -3156,6 +3156,12 @@ private void reportRichMappings(InlineTreeNode* inlineTree, uint numInlineTree, Marshal.FreeHGlobal((IntPtr)mappings); } +#pragma warning disable CA1822 // Mark members as static + private void reportMetadata(byte* key, void* value) +#pragma warning restore CA1822 // Mark members as static + { + } + #pragma warning disable CA1822 // Mark members as static private void* allocateArray(UIntPtr cBytes) #pragma warning restore CA1822 // Mark members as static diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 13844628bb8639..2e4e617b40b9b7 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -1547,6 +1547,20 @@ private static void _reportRichMappings(IntPtr thisHandle, IntPtr* ppException, } } + [UnmanagedCallersOnly] + private static void _reportMetadata(IntPtr thisHandle, IntPtr* ppException, byte* key, void* value) + { + var _this = GetThis(thisHandle); + try + { + _this.reportMetadata(key, value); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + } + } + [UnmanagedCallersOnly] private static void* _allocateArray(IntPtr thisHandle, IntPtr* ppException, UIntPtr cBytes) { @@ -2551,7 +2565,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 172); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 173); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2657,74 +2671,75 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[101] = (delegate* unmanaged)&_getVars; callbacks[102] = (delegate* unmanaged)&_setVars; callbacks[103] = (delegate* unmanaged)&_reportRichMappings; - callbacks[104] = (delegate* unmanaged)&_allocateArray; - callbacks[105] = (delegate* unmanaged)&_freeArray; - callbacks[106] = (delegate* unmanaged)&_getArgNext; - callbacks[107] = (delegate* unmanaged)&_getArgType; - callbacks[108] = (delegate* unmanaged)&_getExactClasses; - callbacks[109] = (delegate* unmanaged)&_getArgClass; - callbacks[110] = (delegate* unmanaged)&_getHFAType; - callbacks[111] = (delegate* unmanaged)&_runWithErrorTrap; - callbacks[112] = (delegate* unmanaged)&_runWithSPMIErrorTrap; - callbacks[113] = (delegate* unmanaged)&_getEEInfo; - callbacks[114] = (delegate* unmanaged)&_getJitTimeLogFilename; - callbacks[115] = (delegate* unmanaged)&_getMethodDefFromMethod; - callbacks[116] = (delegate* unmanaged)&_printMethodName; - callbacks[117] = (delegate* unmanaged)&_getMethodNameFromMetadata; - callbacks[118] = (delegate* unmanaged)&_getMethodHash; - callbacks[119] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; - callbacks[120] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; - callbacks[121] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; - callbacks[122] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[123] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[124] = (delegate* unmanaged)&_getHelperFtn; - callbacks[125] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[126] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[127] = (delegate* unmanaged)&_getMethodSync; - callbacks[128] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[129] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[130] = (delegate* unmanaged)&_embedClassHandle; - callbacks[131] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[132] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[133] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[134] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[135] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[136] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[137] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[138] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[139] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[140] = (delegate* unmanaged)&_getCallInfo; - callbacks[141] = (delegate* unmanaged)&_getClassDomainID; - callbacks[142] = (delegate* unmanaged)&_getStaticFieldContent; - callbacks[143] = (delegate* unmanaged)&_getObjectContent; - callbacks[144] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[145] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[146] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[147] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[148] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[149] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[150] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[151] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[152] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[153] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[154] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[155] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[156] = (delegate* unmanaged)&_allocMem; - callbacks[157] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[158] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[159] = (delegate* unmanaged)&_allocGCInfo; - callbacks[160] = (delegate* unmanaged)&_setEHcount; - callbacks[161] = (delegate* unmanaged)&_setEHinfo; - callbacks[162] = (delegate* unmanaged)&_logMsg; - callbacks[163] = (delegate* unmanaged)&_doAssert; - callbacks[164] = (delegate* unmanaged)&_reportFatalError; - callbacks[165] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[166] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[167] = (delegate* unmanaged)&_recordCallSite; - callbacks[168] = (delegate* unmanaged)&_recordRelocation; - callbacks[169] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[170] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[171] = (delegate* unmanaged)&_getJitFlags; + callbacks[104] = (delegate* unmanaged)&_reportMetadata; + callbacks[105] = (delegate* unmanaged)&_allocateArray; + callbacks[106] = (delegate* unmanaged)&_freeArray; + callbacks[107] = (delegate* unmanaged)&_getArgNext; + callbacks[108] = (delegate* unmanaged)&_getArgType; + callbacks[109] = (delegate* unmanaged)&_getExactClasses; + callbacks[110] = (delegate* unmanaged)&_getArgClass; + callbacks[111] = (delegate* unmanaged)&_getHFAType; + callbacks[112] = (delegate* unmanaged)&_runWithErrorTrap; + callbacks[113] = (delegate* unmanaged)&_runWithSPMIErrorTrap; + callbacks[114] = (delegate* unmanaged)&_getEEInfo; + callbacks[115] = (delegate* unmanaged)&_getJitTimeLogFilename; + callbacks[116] = (delegate* unmanaged)&_getMethodDefFromMethod; + callbacks[117] = (delegate* unmanaged)&_printMethodName; + callbacks[118] = (delegate* unmanaged)&_getMethodNameFromMetadata; + callbacks[119] = (delegate* unmanaged)&_getMethodHash; + callbacks[120] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; + callbacks[121] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; + callbacks[122] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; + callbacks[123] = (delegate* unmanaged)&_getThreadTLSIndex; + callbacks[124] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; + callbacks[125] = (delegate* unmanaged)&_getHelperFtn; + callbacks[126] = (delegate* unmanaged)&_getFunctionEntryPoint; + callbacks[127] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; + callbacks[128] = (delegate* unmanaged)&_getMethodSync; + callbacks[129] = (delegate* unmanaged)&_getLazyStringLiteralHelper; + callbacks[130] = (delegate* unmanaged)&_embedModuleHandle; + callbacks[131] = (delegate* unmanaged)&_embedClassHandle; + callbacks[132] = (delegate* unmanaged)&_embedMethodHandle; + callbacks[133] = (delegate* unmanaged)&_embedFieldHandle; + callbacks[134] = (delegate* unmanaged)&_embedGenericHandle; + callbacks[135] = (delegate* unmanaged)&_getLocationOfThisType; + callbacks[136] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; + callbacks[137] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; + callbacks[138] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; + callbacks[139] = (delegate* unmanaged)&_getJustMyCodeHandle; + callbacks[140] = (delegate* unmanaged)&_GetProfilingHandle; + callbacks[141] = (delegate* unmanaged)&_getCallInfo; + callbacks[142] = (delegate* unmanaged)&_getClassDomainID; + callbacks[143] = (delegate* unmanaged)&_getStaticFieldContent; + callbacks[144] = (delegate* unmanaged)&_getObjectContent; + callbacks[145] = (delegate* unmanaged)&_getStaticFieldCurrentClass; + callbacks[146] = (delegate* unmanaged)&_getVarArgsHandle; + callbacks[147] = (delegate* unmanaged)&_canGetVarArgsHandle; + callbacks[148] = (delegate* unmanaged)&_constructStringLiteral; + callbacks[149] = (delegate* unmanaged)&_emptyStringLiteral; + callbacks[150] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; + callbacks[151] = (delegate* unmanaged)&_GetDelegateCtor; + callbacks[152] = (delegate* unmanaged)&_MethodCompileComplete; + callbacks[153] = (delegate* unmanaged)&_getTailCallHelpers; + callbacks[154] = (delegate* unmanaged)&_convertPInvokeCalliToCall; + callbacks[155] = (delegate* unmanaged)&_notifyInstructionSetUsage; + callbacks[156] = (delegate* unmanaged)&_updateEntryPointForTailCall; + callbacks[157] = (delegate* unmanaged)&_allocMem; + callbacks[158] = (delegate* unmanaged)&_reserveUnwindInfo; + callbacks[159] = (delegate* unmanaged)&_allocUnwindInfo; + callbacks[160] = (delegate* unmanaged)&_allocGCInfo; + callbacks[161] = (delegate* unmanaged)&_setEHcount; + callbacks[162] = (delegate* unmanaged)&_setEHinfo; + callbacks[163] = (delegate* unmanaged)&_logMsg; + callbacks[164] = (delegate* unmanaged)&_doAssert; + callbacks[165] = (delegate* unmanaged)&_reportFatalError; + callbacks[166] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[167] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; + callbacks[168] = (delegate* unmanaged)&_recordCallSite; + callbacks[169] = (delegate* unmanaged)&_recordRelocation; + callbacks[170] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[171] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[172] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 3407e4e3a87c9a..4b7a3589abdf72 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -266,6 +266,7 @@ FUNCTIONS void getVars(CORINFO_METHOD_HANDLE ftn, uint32_t* cVars, ICorDebugInfo::ILVarInfo** vars, bool* extendOthers) void setVars(CORINFO_METHOD_HANDLE ftn, uint32_t cVars, ICorDebugInfo::NativeVarInfo* vars) void reportRichMappings(ICorDebugInfo::InlineTreeNode* inlineTreeNodes, uint32_t numInlineTreeNodes, ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings) + void reportMetadata(const char* key, const void* value) void*allocateArray(size_t cBytes); void freeArray(void*array); CORINFO_ARG_LIST_HANDLE getArgNext(CORINFO_ARG_LIST_HANDLE args); diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index 5d659488b29257..6b17b4a552ffde 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -115,6 +115,7 @@ struct JitInterfaceCallbacks void (* getVars)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, uint32_t* cVars, ICorDebugInfo::ILVarInfo** vars, bool* extendOthers); void (* setVars)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, uint32_t cVars, ICorDebugInfo::NativeVarInfo* vars); void (* reportRichMappings)(void * thisHandle, CorInfoExceptionClass** ppException, ICorDebugInfo::InlineTreeNode* inlineTreeNodes, uint32_t numInlineTreeNodes, ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings); + void (* reportMetadata)(void * thisHandle, CorInfoExceptionClass** ppException, const char* key, const void* value); void* (* allocateArray)(void * thisHandle, CorInfoExceptionClass** ppException, size_t cBytes); void (* freeArray)(void * thisHandle, CorInfoExceptionClass** ppException, void* array); CORINFO_ARG_LIST_HANDLE (* getArgNext)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_ARG_LIST_HANDLE args); @@ -1214,6 +1215,15 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } + virtual void reportMetadata( + const char* key, + const void* value) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->reportMetadata(_thisHandle, &pException, key, value); + if (pException != nullptr) throw pException; +} + virtual void* allocateArray( size_t cBytes) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp index 3c6653c41a1c3a..ec6e88684060b2 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp @@ -32,7 +32,13 @@ CompileResult::CompileResult() allocGCInfoDets.retval = nullptr; allocGCInfoDets.size = 0; + MethodFullName = nullptr; + TieringName = nullptr; memoryTracker = nullptr; + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) name = 0; +#include "jitmetadatalist.h" } CompileResult::~CompileResult() diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h index b7be4dcd89279e..72415f1d38f595 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h @@ -121,6 +121,8 @@ class CompileResult void dmpSetVars(DWORD key, const Agnostic_SetVars& value); bool repSetVars(CORINFO_METHOD_HANDLE* ftn, ULONG32* cVars, ICorDebugInfo::NativeVarInfo** vars); + void recMetadata(const char* key, const void* value); + void recSetPatchpointInfo(PatchpointInfo* patchpointInfo); void dmpSetPatchpointInfo(DWORD key, const Agnostic_SetPatchpointInfo& value); bool repSetPatchpointInfo(PatchpointInfo** patchpointInfo); @@ -215,6 +217,15 @@ class CompileResult #define DENSELWM(map, value) DenseLightWeightMap* map; #include "crlwmlist.h" +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) type name; +#include "jitmetadatalist.h" + + // Reported method full name from JIT (not available with release JIT) + const char* MethodFullName; + // Reported compilation tier from JIT + const char* TieringName; + // not persisted to disk. public: LightWeightMap* CallTargetTypes; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h b/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h new file mode 100644 index 00000000000000..f43f4300d73a1a --- /dev/null +++ b/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h @@ -0,0 +1 @@ +#include "../../../jit/jitmetadatalist.h" diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index 47c50535e450f1..5d95ff65b6950a 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1193,6 +1193,12 @@ void interceptor_ICJI::reportRichMappings(ICorDebugInfo::InlineTreeNode* inli original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings); } +void interceptor_ICJI::reportMetadata(const char* key, const void* value) +{ + mc->cr->AddCall("reportMetadata"); + original_ICorJitInfo->reportMetadata(key, value); +} + /*-------------------------- Misc ---------------------------------------*/ // Used to allocate memory that needs to handed to the EE. // For eg, use this to allocated memory for reporting debug info, diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index 0f69dfae4a08c6..ed2dd8c79c4194 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -845,6 +845,14 @@ void interceptor_ICJI::reportRichMappings( original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings); } +void interceptor_ICJI::reportMetadata( + const char* key, + const void* value) +{ + mcs->AddCall("reportMetadata"); + original_ICorJitInfo->reportMetadata(key, value); +} + void* interceptor_ICJI::allocateArray( size_t cBytes) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index 02bef7b549aca0..6242fb8b055ed7 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -741,6 +741,13 @@ void interceptor_ICJI::reportRichMappings( original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings); } +void interceptor_ICJI::reportMetadata( + const char* key, + const void* value) +{ + original_ICorJitInfo->reportMetadata(key, value); +} + void* interceptor_ICJI::allocateArray( size_t cBytes) { diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.cpp b/src/coreclr/tools/superpmi/superpmi/fileio.cpp index ed16485a038e8a..e26723de6e3e1e 100644 --- a/src/coreclr/tools/superpmi/superpmi/fileio.cpp +++ b/src/coreclr/tools/superpmi/superpmi/fileio.cpp @@ -27,10 +27,7 @@ bool FileWriter::Printf(const char* fmt, ...) } else { - DWORD numWritten; - bool result = - WriteFile(m_file.Get(), pBuffer, static_cast(printed), &numWritten, nullptr) && - (numWritten == static_cast(printed)); + bool result = Print(pBuffer, static_cast(printed)); if (pBuffer != stackBuffer) delete[] pBuffer; @@ -41,6 +38,75 @@ bool FileWriter::Printf(const char* fmt, ...) } } +bool FileWriter::Print(const char* value, size_t numChars) +{ + DWORD numWritten; + bool result = + WriteFile(m_file.Get(), value, static_cast(numChars), &numWritten, nullptr) && + (numWritten == static_cast(numChars)); + return result; +} + +bool FileWriter::Print(const char* value) +{ + return Print(value, strlen(value)); +} + +bool FileWriter::Print(int value) +{ + return Printf("%d", value); +} + +bool FileWriter::Print(int64_t value) +{ + return Printf("%lld", value); +} + +bool FileWriter::Print(double value) +{ + return Printf("%f", value); +} + +bool FileWriter::PrintQuotedCsvField(const char* value) +{ + size_t numQuotes = 0; + for (const char* p = value; *p != '\0'; p++) + { + if (*p == '"') + { + numQuotes++; + } + } + + if (numQuotes == 0) + { + return Printf("\"%s\"", value); + } + else + { + size_t len = 2 + strlen(value) + numQuotes; + char* buffer = new char[len]; + + size_t index = 0; + buffer[index++] = '"'; + for (const char* p = value; *p != '\0'; p++) + { + if (*p == '"') + { + buffer[index++] = '"'; + } + buffer[index++] = *p; + } + + buffer[index++] = '"'; + assert(index == len); + + bool result = Print(buffer, len); + delete[] buffer; + return result; + } +} + bool FileWriter::CreateNew(const char* path, FileWriter* fw) { FileHandle handle(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)); diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.h b/src/coreclr/tools/superpmi/superpmi/fileio.h index a88e74d6ee00c5..4a1434f972598a 100644 --- a/src/coreclr/tools/superpmi/superpmi/fileio.h +++ b/src/coreclr/tools/superpmi/superpmi/fileio.h @@ -93,6 +93,12 @@ class FileWriter { } + bool Print(const char* value, size_t numChars); + bool Print(const char* value); + bool Print(int value); + bool Print(int64_t value); + bool Print(double value); + bool PrintQuotedCsvField(const char* value); bool Printf(const char* fmt, ...); static bool CreateNew(const char* path, FileWriter* fw); diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 15c17173abdda4..c8ed4b67930d33 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -1027,6 +1027,41 @@ void MyICJI::reportRichMappings( freeArray(mappings); } +void MyICJI::reportMetadata(const char* key, const void* value) +{ + jitInstance->mc->cr->AddCall("reportMetadata"); + + if (strcmp(key, "MethodFullName") == 0) + { + const char* str = static_cast(value); + size_t len = strlen(str); + char* buf = static_cast(jitInstance->mc->cr->allocateMemory(len + 1)); + memcpy(buf, str, len + 1); + jitInstance->mc->cr->MethodFullName = buf; + return; + } + + if (strcmp(key, "TieringName") == 0) + { + const char* str = static_cast(value); + size_t len = strlen(str); + char* buf = static_cast(jitInstance->mc->cr->allocateMemory(len + 1)); + memcpy(buf, str, len + 1); + jitInstance->mc->cr->TieringName = buf; + return; + } + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) \ + if (strcmp(key, #name) == 0) \ + { \ + memcpy(&jitInstance->mc->cr->name, value, sizeof(type)); \ + return; \ + } + +#include "jitmetadatalist.h" +} + /*-------------------------- Misc ---------------------------------------*/ // Used to allocate memory that needs to handed to the EE. diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp index 7c55db51af20cf..8eb0e69e8b2e5b 100644 --- a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp +++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp @@ -459,6 +459,7 @@ ReplayResults JitInstance::CompileMethod(MethodContext* MethodToCompile, int mcI } mc->cr->secondsToCompile = stj.GetSeconds(); + param.results.CompileResults = mc->cr; UINT64 insCountAfter = 0; Instrumentor_GetInsCount(&insCountAfter); diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.h b/src/coreclr/tools/superpmi/superpmi/jitinstance.h index 03e283c1d5b62d..b13fe46d641f40 100644 --- a/src/coreclr/tools/superpmi/superpmi/jitinstance.h +++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.h @@ -22,6 +22,7 @@ struct ReplayResults bool IsMinOpts = false; uint32_t NumCodeBytes = 0; uint64_t NumExecutedInstructions = 0; + CompileResult* CompileResults = nullptr; }; class JitInstance diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index 4ac03d18ce9a6d..f0b4b76fcc1c19 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -137,42 +137,81 @@ static const char* ResultToString(ReplayResult result) } } -static bool PrintDiffsCsvHeader(FileWriter& fw) +static void PrintDiffsCsvHeader(FileWriter& fw) { - return fw.Printf("Context,Context size,Base result,Diff result,MinOpts,Has diff,Base size,Diff size,Base instructions,Diff instructions\n"); + fw.Print("Context,Context size,Method full name,Tier name,Base result,Diff result,MinOpts,Has diff,Base size,Diff size,Base instructions,Diff instructions"); + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) fw.Print(",Base " #name ",Diff " #name); + +#include "jitmetadatalist.h" + + fw.Print("\n"); } -static bool PrintDiffsCsvRow( +static void PrintDiffsCsvRow( FileWriter& fw, int context, uint32_t contextSize, const ReplayResults& baseRes, const ReplayResults& diffRes, bool hasDiff) { - return fw.Printf("%d,%u,%s,%s,%s,%s,%u,%u,%lld,%lld\n", - context, contextSize, + fw.Printf("%d,%u,", context, contextSize); + fw.PrintQuotedCsvField(baseRes.CompileResults->MethodFullName == nullptr ? "" : baseRes.CompileResults->MethodFullName); + fw.Printf( + ",%s,%s,%s,%s,%s,%u,%u,%lld,%lld", + baseRes.CompileResults->TieringName == nullptr ? "" : baseRes.CompileResults->TieringName, ResultToString(baseRes.Result), ResultToString(diffRes.Result), baseRes.IsMinOpts ? "True" : "False", hasDiff ? "True" : "False", baseRes.NumCodeBytes, diffRes.NumCodeBytes, baseRes.NumExecutedInstructions, diffRes.NumExecutedInstructions); + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) \ + fw.Print(","); \ + fw.Print(baseRes.CompileResults->name); \ + fw.Print(","); \ + fw.Print(diffRes.CompileResults->name); + +#include "jitmetadatalist.h" + + fw.Print("\n"); } -static bool PrintReplayCsvHeader(FileWriter& fw) +static void PrintReplayCsvHeader(FileWriter& fw) { - return fw.Printf("Context,Context size,Result,MinOpts,Size,Instructions\n"); + fw.Printf("Context,Context size,Method full name,Tier name,Result,MinOpts,Size,Instructions"); + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) fw.Print("," #name); + +#include "jitmetadatalist.h" + + fw.Print("\n"); } -static bool PrintReplayCsvRow( +static void PrintReplayCsvRow( FileWriter& fw, int context, uint32_t contextSize, const ReplayResults& res) { - return fw.Printf("%d,%u,%s,%s,%u,%lld\n", - context, contextSize, + fw.Printf("%d,%u,", context, contextSize); + fw.PrintQuotedCsvField(res.CompileResults->MethodFullName == nullptr ? "" : res.CompileResults->MethodFullName); + fw.Printf(",%s,%s,%s,%u,%lld", + res.CompileResults->TieringName == nullptr ? "" : res.CompileResults->TieringName, ResultToString(res.Result), res.IsMinOpts ? "True" : "False", res.NumCodeBytes, res.NumExecutedInstructions); + +#define JITMETADATAINFO(name, type, flags) +#define JITMETADATAMETRIC(name, type, flags) \ + fw.Print(","); \ + fw.Print(res.CompileResults->name); + +#include "jitmetadatalist.h" + + fw.Print("\n"); } // Run superpmi. The return value is as follows: diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 62d69c0fe4ef98..c6031ee224577f 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10956,6 +10956,21 @@ void CEEJitInfo::reportRichMappings( EE_TO_JIT_TRANSITION(); } +void CEEJitInfo::reportMetadata( + const char* key, + const void* value) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + + EE_TO_JIT_TRANSITION_LEAF(); +} + void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) { CONTRACTL { @@ -14421,6 +14436,12 @@ void CEEInfo::reportRichMappings( UNREACHABLE(); // only called on derived class. } +void CEEInfo::reportMetadata(const char* key, const void* value) +{ + LIMITED_METHOD_CONTRACT; + UNREACHABLE(); // only called on derived class. +} + void CEEInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) { LIMITED_METHOD_CONTRACT; diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 1708a05df5e314..277f39e8a80eac 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -897,6 +897,8 @@ class CEEJitInfo : public CEEInfo ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings) override final; + void reportMetadata(const char* key, const void* value) override final; + void* getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ void ** ppIndirection) override final; /* OUT */ static PCODE getHelperFtnStatic(CorInfoHelpFunc ftnNum); From b600ffc035f7f07e33a1a31e1cacb0f288937bac Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 16:06:00 +0100 Subject: [PATCH 49/64] Fix --- src/coreclr/scripts/superpmi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 1c1813a0131101..07a1f4427c7441 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2272,7 +2272,7 @@ def create_exception(): delta_bytes = diff_bytes - base_bytes logging.info("Total bytes of delta: {} ({:.2%} of base)".format(delta_bytes, delta_bytes / base_bytes)) - if "PerfScore" in self.coreclr_args.metrics: + if self.coreclr_args.metrics is not None and "PerfScore" in self.coreclr_args.metrics: base_perfscore = base_metrics["Overall"]["Diffed PerfScore"] diff_perfscore = diff_metrics["Overall"]["Diffed PerfScore"] logging.info("Total PerfScore of base: {}".format(base_perfscore)) From d99c2f7c9d01b37e29cc66145d9aaa80a414064f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 17:20:11 +0100 Subject: [PATCH 50/64] Print PerfScore geomeans --- src/coreclr/scripts/superpmi.py | 42 ++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 07a1f4427c7441..fed8109ac0e790 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -22,6 +22,7 @@ import datetime import locale import logging +import math import os import multiprocessing import platform @@ -1865,7 +1866,8 @@ def aggregate_diff_metrics(details): """ base_minopts = {"Successful compiles": 0, "Missing compiles": 0, "Failing compiles": 0, - "Contexts with diffs": 0, "Diffed code bytes": 0, "Diffed PerfScore" : 0.0, + "Contexts with diffs": 0, "Diffed code bytes": 0, + "Diffed PerfScore" : 0.0, "Relative PerfScore Geomean": 0.0, "Diff executed instructions": 0, "Diffed contexts": 0} base_fullopts = base_minopts.copy() @@ -1915,6 +1917,11 @@ def aggregate_diff_metrics(details): base_dict["Diffed PerfScore"] += base_perfscore diff_dict["Diffed PerfScore"] += diff_perfscore + if base_perfscore > 0: + log_relative_perfscore = math.log(diff_perfscore / base_perfscore) + base_dict["Relative PerfScore Geomean"] += log_relative_perfscore + diff_dict["Relative PerfScore Geomean"] += log_relative_perfscore + base_dict["Diffed contexts"] += 1 diff_dict["Diffed contexts"] += 1 @@ -1930,6 +1937,18 @@ def aggregate_diff_metrics(details): for k in diff_overall.keys(): diff_overall[k] += diff_fullopts[k] + for d in [base_overall, base_minopts, base_fullopts, diff_overall, diff_minopts, diff_fullopts]: + sum_of_logs = d["Relative PerfScore Geomean"] + if d["Diffed contexts"] > 0: + d["Relative PerfScore Geomean"] = math.exp(sum_of_logs / d["Diffed contexts"]) + else: + d["Relative PerfScore Geomean"] = 1 + + if d["Contexts with diffs"] > 0: + d["Relative PerfScore Geomean (Diffs)"] = math.exp(sum_of_logs / d["Contexts with diffs"]) + else: + d["Relative PerfScore Geomean (Diffs)"] = 1 + return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts}, {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts}) @@ -2271,14 +2290,21 @@ def create_exception(): logging.info("Total bytes of diff: {}".format(diff_bytes)) delta_bytes = diff_bytes - base_bytes logging.info("Total bytes of delta: {} ({:.2%} of base)".format(delta_bytes, delta_bytes / base_bytes)) + logging.info("") + + base_perfscore = base_metrics["Overall"]["Diffed PerfScore"] + diff_perfscore = diff_metrics["Overall"]["Diffed PerfScore"] + logging.info("Total PerfScore of base: {}".format(base_perfscore)) + logging.info("Total PerfScore of diff: {}".format(diff_perfscore)) + delta_perfscore = diff_perfscore - base_perfscore + logging.info("Total PerfScore of delta: {} ({:.2%} of base)".format(delta_perfscore, delta_perfscore / base_perfscore)) + logging.info("") - if self.coreclr_args.metrics is not None and "PerfScore" in self.coreclr_args.metrics: - base_perfscore = base_metrics["Overall"]["Diffed PerfScore"] - diff_perfscore = diff_metrics["Overall"]["Diffed PerfScore"] - logging.info("Total PerfScore of base: {}".format(base_perfscore)) - logging.info("Total PerfScore of diff: {}".format(diff_perfscore)) - delta_perfscore = diff_perfscore - base_perfscore - logging.info("Total PerfScore of delta: {} ({:.2%} of base)".format(delta_perfscore, delta_perfscore / base_perfscore)) + relative_perfscore_geomean = diff_metrics["Overall"]["Relative PerfScore Geomean"] + logging.info("Relative PerfScore Geomean: {:.4%}".format(relative_perfscore_geomean - 1)) + relative_perfscore_geomean_diffs = diff_metrics["Overall"]["Relative PerfScore Geomean (Diffs)"] + logging.info("Relative PerfScore Geomean (Diffs): {:.4%}".format(relative_perfscore_geomean_diffs - 1)) + logging.info("") try: current_text_diff = text_differences.get_nowait() From f4e853b4a62afa87f709f29b58f76f7600a8780d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 17:42:19 +0100 Subject: [PATCH 51/64] Add to report --- src/coreclr/scripts/superpmi.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index fed8109ac0e790..89d5ada34ac201 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1808,10 +1808,10 @@ def compute_pct(base, diff): else: return 0.0 -def format_pct(pct): +def format_pct(pct, num_decimals = 2): plus_if_positive = "+" if pct > 0 else "" - text = "{}{:.2f}%".format(plus_if_positive, pct) + text = "{}{:.{prec}f}%".format(plus_if_positive, pct, prec=num_decimals) if pct != 0: color = "red" if pct > 0 else "green" return html_color(color, text) @@ -2536,19 +2536,21 @@ def write_pivot_section(row): sum_diff = sum(diff_metrics[row]["Diffed code bytes"] for (_, _, diff_metrics, _, _, _) in asm_diffs) with DetailsSection(write_fh, "{} ({} bytes)".format(row, format_delta(sum_base, sum_diff))): - write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|\n") - write_fh.write("|---|--:|--:|\n") + write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|Rel PerfScore Geomean|Rel PerfScore Geomean over Diffs\n") + write_fh.write("|---|--:|--:|--:|--:|\n") for (mch_file, base_metrics, diff_metrics, _, _, _) in asm_diffs: # Exclude this particular row? if not has_diffs(diff_metrics[row]): continue - write_fh.write("|{}|{:,d}|{}|\n".format( + write_fh.write("|{}|{:,d}|{}|{}|{}|\n".format( mch_file, base_metrics[row]["Diffed code bytes"], format_delta( base_metrics[row]["Diffed code bytes"], - diff_metrics[row]["Diffed code bytes"]))) + diff_metrics[row]["Diffed code bytes"]), + format_pct(diff_metrics[row]["Relative PerfScore Geomean"] * 100 - 100, 4), + format_pct(diff_metrics[row]["Relative PerfScore Geomean (Diffs)"] * 100 - 100, 4))) write_top_context_section() write_pivot_section("Overall") From bc38b0e405c45f140a9e9b90a76424c23050003a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 18:00:21 +0100 Subject: [PATCH 52/64] Clean up --- src/coreclr/scripts/superpmi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 89d5ada34ac201..7b10d87fe6ce3c 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2536,7 +2536,7 @@ def write_pivot_section(row): sum_diff = sum(diff_metrics[row]["Diffed code bytes"] for (_, _, diff_metrics, _, _, _) in asm_diffs) with DetailsSection(write_fh, "{} ({} bytes)".format(row, format_delta(sum_base, sum_diff))): - write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|Rel PerfScore Geomean|Rel PerfScore Geomean over Diffs\n") + write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|PerfScore|PerfScore in Diffs\n") write_fh.write("|---|--:|--:|--:|--:|\n") for (mch_file, base_metrics, diff_metrics, _, _, _) in asm_diffs: # Exclude this particular row? @@ -2550,7 +2550,7 @@ def write_pivot_section(row): base_metrics[row]["Diffed code bytes"], diff_metrics[row]["Diffed code bytes"]), format_pct(diff_metrics[row]["Relative PerfScore Geomean"] * 100 - 100, 4), - format_pct(diff_metrics[row]["Relative PerfScore Geomean (Diffs)"] * 100 - 100, 4))) + format_pct(diff_metrics[row]["Relative PerfScore Geomean (Diffs)"] * 100 - 100))) write_top_context_section() write_pivot_section("Overall") From 8b511bb2d0c7531beb0b2d64ba84c0069ee7a6f2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Feb 2024 18:18:24 +0100 Subject: [PATCH 53/64] Move overall perfscore change to details --- src/coreclr/scripts/superpmi.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 7b10d87fe6ce3c..29e3801147500a 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -2536,20 +2536,19 @@ def write_pivot_section(row): sum_diff = sum(diff_metrics[row]["Diffed code bytes"] for (_, _, diff_metrics, _, _, _) in asm_diffs) with DetailsSection(write_fh, "{} ({} bytes)".format(row, format_delta(sum_base, sum_diff))): - write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|PerfScore|PerfScore in Diffs\n") - write_fh.write("|---|--:|--:|--:|--:|\n") + write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|PerfScore in Diffs\n") + write_fh.write("|---|--:|--:|--:|\n") for (mch_file, base_metrics, diff_metrics, _, _, _) in asm_diffs: # Exclude this particular row? if not has_diffs(diff_metrics[row]): continue - write_fh.write("|{}|{:,d}|{}|{}|{}|\n".format( + write_fh.write("|{}|{:,d}|{}|{}|\n".format( mch_file, base_metrics[row]["Diffed code bytes"], format_delta( base_metrics[row]["Diffed code bytes"], diff_metrics[row]["Diffed code bytes"]), - format_pct(diff_metrics[row]["Relative PerfScore Geomean"] * 100 - 100, 4), format_pct(diff_metrics[row]["Relative PerfScore Geomean (Diffs)"] * 100 - 100))) write_top_context_section() @@ -2569,26 +2568,27 @@ def write_pivot_section(row): with DetailsSection(write_fh, "Details"): if any_diffs: write_fh.write("#### Improvements/regressions per collection\n\n") - write_fh.write("|Collection|Contexts with diffs|Improvements|Regressions|Same size|Improvements (bytes)|Regressions (bytes)|\n") - write_fh.write("|---|--:|--:|--:|--:|--:|--:|\n") + write_fh.write("|Collection|Contexts with diffs|Improvements|Regressions|Same size|Improvements (bytes)|Regressions (bytes)|PerfScore Overall (FullOpts)|\n") + write_fh.write("|---|--:|--:|--:|--:|--:|--:|--:|\n") - def write_row(name, diffs): + def write_row(name, diffs, perfscore_geomean): base_diff_sizes = [(int(r["Base size"]), int(r["Diff size"])) for r in diffs] (num_improvements, num_regressions, num_same, byte_improvements, byte_regressions) = calculate_improvements_regressions(base_diff_sizes) - write_fh.write("|{}|{:,d}|{}|{}|{}|{}|{}|\n".format( + write_fh.write("|{}|{:,d}|{}|{}|{}|{}|{}|{}|\n".format( name, len(diffs), html_color("green", "{:,d}".format(num_improvements)), html_color("red", "{:,d}".format(num_regressions)), html_color("blue", "{:,d}".format(num_same)), html_color("green", "-{:,d}".format(byte_improvements)), - html_color("red", "+{:,d}".format(byte_regressions)))) + html_color("red", "+{:,d}".format(byte_regressions)), + "" if perfscore_geomean is None else format_pct(perfscore_geomean, 4))) - for (mch_file, _, _, diffs, _, _) in asm_diffs: - write_row(mch_file, diffs) + for (mch_file, _, diff_metrics, diffs, _, _) in asm_diffs: + write_row(mch_file, diffs, diff_metrics["FullOpts"]["Relative PerfScore Geomean"] * 100 - 100) if len(asm_diffs) > 1: - write_row("", [r for (_, _, _, diffs, _, _) in asm_diffs for r in diffs]) + write_row("", [r for (_, _, _, diffs, _, _) in asm_diffs for r in diffs], None) write_fh.write("\n---\n\n") From 212ef843ee79194cec8e2f746b5e5bf7d9156ff4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Feb 2024 11:48:24 +0100 Subject: [PATCH 54/64] Fixes after merge --- .../JitInterface/CorInfoImpl_generated.cs | 141 ------------------ .../aot/jitinterface/jitinterface_generated.h | 13 -- 2 files changed, 154 deletions(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index b19971709ef519..7789862f5074a1 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2671,146 +2671,6 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[101] = (delegate* unmanaged)&_getVars; callbacks[102] = (delegate* unmanaged)&_setVars; callbacks[103] = (delegate* unmanaged)&_reportRichMappings; -<<<<<<< HEAD - callbacks[104] = (delegate* unmanaged)&_reportMetadata; - callbacks[105] = (delegate* unmanaged)&_allocateArray; - callbacks[106] = (delegate* unmanaged)&_freeArray; - callbacks[107] = (delegate* unmanaged)&_getArgNext; - callbacks[108] = (delegate* unmanaged)&_getArgType; - callbacks[109] = (delegate* unmanaged)&_getExactClasses; - callbacks[110] = (delegate* unmanaged)&_getArgClass; - callbacks[111] = (delegate* unmanaged)&_getHFAType; - callbacks[112] = (delegate* unmanaged)&_runWithErrorTrap; - callbacks[113] = (delegate* unmanaged)&_runWithSPMIErrorTrap; - callbacks[114] = (delegate* unmanaged)&_getEEInfo; - callbacks[115] = (delegate* unmanaged)&_getJitTimeLogFilename; - callbacks[116] = (delegate* unmanaged)&_getMethodDefFromMethod; - callbacks[117] = (delegate* unmanaged)&_printMethodName; - callbacks[118] = (delegate* unmanaged)&_getMethodNameFromMetadata; - callbacks[119] = (delegate* unmanaged)&_getMethodHash; - callbacks[120] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; - callbacks[121] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; - callbacks[122] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; - callbacks[123] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[124] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[125] = (delegate* unmanaged)&_getHelperFtn; - callbacks[126] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[127] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[128] = (delegate* unmanaged)&_getMethodSync; - callbacks[129] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[130] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[131] = (delegate* unmanaged)&_embedClassHandle; - callbacks[132] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[133] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[134] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[135] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[136] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[137] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[138] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[139] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[140] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[141] = (delegate* unmanaged)&_getCallInfo; - callbacks[142] = (delegate* unmanaged)&_getClassDomainID; - callbacks[143] = (delegate* unmanaged)&_getStaticFieldContent; - callbacks[144] = (delegate* unmanaged)&_getObjectContent; - callbacks[145] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[146] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[147] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[148] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[149] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[150] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[151] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[152] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[153] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[154] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[155] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[156] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[157] = (delegate* unmanaged)&_allocMem; - callbacks[158] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[159] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[160] = (delegate* unmanaged)&_allocGCInfo; - callbacks[161] = (delegate* unmanaged)&_setEHcount; - callbacks[162] = (delegate* unmanaged)&_setEHinfo; - callbacks[163] = (delegate* unmanaged)&_logMsg; - callbacks[164] = (delegate* unmanaged)&_doAssert; - callbacks[165] = (delegate* unmanaged)&_reportFatalError; - callbacks[166] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[167] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[168] = (delegate* unmanaged)&_recordCallSite; - callbacks[169] = (delegate* unmanaged)&_recordRelocation; - callbacks[170] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[171] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[172] = (delegate* unmanaged)&_getJitFlags; -||||||| 9dc6ea62a4d - callbacks[104] = (delegate* unmanaged)&_allocateArray; - callbacks[105] = (delegate* unmanaged)&_freeArray; - callbacks[106] = (delegate* unmanaged)&_getArgNext; - callbacks[107] = (delegate* unmanaged)&_getArgType; - callbacks[108] = (delegate* unmanaged)&_getExactClasses; - callbacks[109] = (delegate* unmanaged)&_getArgClass; - callbacks[110] = (delegate* unmanaged)&_getHFAType; - callbacks[111] = (delegate* unmanaged)&_runWithErrorTrap; - callbacks[112] = (delegate* unmanaged)&_runWithSPMIErrorTrap; - callbacks[113] = (delegate* unmanaged)&_getEEInfo; - callbacks[114] = (delegate* unmanaged)&_getJitTimeLogFilename; - callbacks[115] = (delegate* unmanaged)&_getMethodDefFromMethod; - callbacks[116] = (delegate* unmanaged)&_printMethodName; - callbacks[117] = (delegate* unmanaged)&_getMethodNameFromMetadata; - callbacks[118] = (delegate* unmanaged)&_getMethodHash; - callbacks[119] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; - callbacks[120] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; - callbacks[121] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; - callbacks[122] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[123] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[124] = (delegate* unmanaged)&_getHelperFtn; - callbacks[125] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[126] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[127] = (delegate* unmanaged)&_getMethodSync; - callbacks[128] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[129] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[130] = (delegate* unmanaged)&_embedClassHandle; - callbacks[131] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[132] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[133] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[134] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[135] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[136] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[137] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[138] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[139] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[140] = (delegate* unmanaged)&_getCallInfo; - callbacks[141] = (delegate* unmanaged)&_getClassDomainID; - callbacks[142] = (delegate* unmanaged)&_getStaticFieldContent; - callbacks[143] = (delegate* unmanaged)&_getObjectContent; - callbacks[144] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[145] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[146] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[147] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[148] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[149] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[150] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[151] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[152] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[153] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[154] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[155] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[156] = (delegate* unmanaged)&_allocMem; - callbacks[157] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[158] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[159] = (delegate* unmanaged)&_allocGCInfo; - callbacks[160] = (delegate* unmanaged)&_setEHcount; - callbacks[161] = (delegate* unmanaged)&_setEHinfo; - callbacks[162] = (delegate* unmanaged)&_logMsg; - callbacks[163] = (delegate* unmanaged)&_doAssert; - callbacks[164] = (delegate* unmanaged)&_reportFatalError; - callbacks[165] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[166] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[167] = (delegate* unmanaged)&_recordCallSite; - callbacks[168] = (delegate* unmanaged)&_recordRelocation; - callbacks[169] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[170] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[171] = (delegate* unmanaged)&_getJitFlags; -======= callbacks[104] = (delegate* unmanaged)&_reportMetadata; callbacks[105] = (delegate* unmanaged)&_allocateArray; callbacks[106] = (delegate* unmanaged)&_freeArray; @@ -2880,7 +2740,6 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[170] = (delegate* unmanaged)&_getRelocTypeHint; callbacks[171] = (delegate* unmanaged)&_getExpectedTargetArchitecture; callbacks[172] = (delegate* unmanaged)&_getJitFlags; ->>>>>>> 80084aa5632026b7d14d559d22e31403a9e89960 return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index 0e97f58dd4be3b..3723b57aa4d8e2 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -1215,18 +1215,6 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } -<<<<<<< HEAD - virtual void reportMetadata( - const char* key, - const void* value) -{ - CorInfoExceptionClass* pException = nullptr; - _callbacks->reportMetadata(_thisHandle, &pException, key, value); - if (pException != nullptr) throw pException; -} - -||||||| 9dc6ea62a4d -======= virtual void reportMetadata( const char* key, const void* value, @@ -1237,7 +1225,6 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } ->>>>>>> 80084aa5632026b7d14d559d22e31403a9e89960 virtual void* allocateArray( size_t cBytes) { From 1c4b20df60f6f4edec7136dc31a3533aa70eb92d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Feb 2024 13:11:12 +0100 Subject: [PATCH 55/64] Run jit-format --- src/coreclr/jit/inductionvariableopts.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index d0e126112dfa77..e1a51f3f2a0891 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1452,7 +1452,8 @@ PhaseStatus Compiler::optInductionVariables() if (insertInitAfter->IsPhiDefnStmt()) { - while ((insertInitAfter->GetNextStmt() != nullptr) && insertInitAfter->GetNextStmt()->IsPhiDefnStmt()) + while ((insertInitAfter->GetNextStmt() != nullptr) && + insertInitAfter->GetNextStmt()->IsPhiDefnStmt()) { insertInitAfter = insertInitAfter->GetNextStmt(); } From 3ba71fc38aa0ba1542ef6c8eb4970606bfe3bbb5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 22 Feb 2024 10:42:12 +0100 Subject: [PATCH 56/64] Do not pick start BB for parameters --- src/coreclr/jit/inductionvariableopts.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index e1a51f3f2a0891..1b74869e2ca157 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1284,12 +1284,12 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne void Compiler::optBestEffortReplaceNarrowIVUsesWith( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) { - JITDUMP(" Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, + JITDUMP("Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, firstStmt == nullptr ? 0 : firstStmt->GetID()); for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt()) { - JITDUMP(" Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, dspTreeID(stmt->GetRootNode())); + JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, dspTreeID(stmt->GetRootNode())); DISPSTMT(stmt); JITDUMP("\n"); @@ -1414,7 +1414,7 @@ PhaseStatus Compiler::optInductionVariables() BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); BasicBlock* initBlock = preheader; - if (startSsaDsc->GetBlock() != nullptr) + if ((startSsaDsc->GetBlock() != nullptr) && (startSsaDsc->GetDefNode() != nullptr)) { initBlock = startSsaDsc->GetBlock(); } @@ -1427,7 +1427,7 @@ PhaseStatus Compiler::optInductionVariables() changed = true; Statement* insertInitAfter = nullptr; - if ((initBlock != preheader) && (startSsaDsc->GetDefNode() != nullptr)) + if (initBlock != preheader) { GenTree* narrowInitRoot = startSsaDsc->GetDefNode(); while (true) From 6a70c2998f8936d835eb16869bbe0da07047cfb6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 23 Feb 2024 10:57:30 +0100 Subject: [PATCH 57/64] Optimize throughput --- src/coreclr/jit/compiler.h | 11 +- src/coreclr/jit/compmemkind.h | 2 +- src/coreclr/jit/inductionvariableopts.cpp | 161 ++++++++++------------ 3 files changed, 81 insertions(+), 93 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4df95f17c2bfba..b639b04fbd5fe7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7413,14 +7413,15 @@ class Compiler PhaseStatus optInductionVariables(); bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop); - bool optIsIVWideningProfitable(unsigned lclNum, - BasicBlock* initBlock, - bool initedToConstant, - FlowGraphNaturalLoop* loop); + bool optIsIVWideningProfitable(unsigned lclNum, + BasicBlock* initBlock, + bool initedToConstant, + FlowGraphNaturalLoop* loop, + ArrayStack& ivUses); void optBestEffortReplaceNarrowIVUsesWith( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); - bool optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); + void optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); // Redundant branch opts // diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 03379196df9278..e986682894c3b6 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -50,7 +50,7 @@ CompMemKindMacro(LoopOpt) CompMemKindMacro(LoopClone) CompMemKindMacro(LoopUnroll) CompMemKindMacro(LoopHoist) -CompMemKindMacro(LoopScalarEvolution) +CompMemKindMacro(LoopIVOpts) CompMemKindMacro(Unknown) CompMemKindMacro(RangeCheck) CompMemKindMacro(CopyProp) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 1b74869e2ca157..b92e6e30d679ad 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -187,7 +187,7 @@ class ScalarEvolutionContext Scev* CreateScevForConstant(GenTreeIntConCommon* tree); public: - ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopScalarEvolution)) + ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts)) { } @@ -214,7 +214,7 @@ class ScalarEvolutionContext // ScevConstant* NewConstant(var_types type, int64_t value) { - ScevConstant* constant = new (m_comp, CMK_LoopScalarEvolution) ScevConstant(type, value); + ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value); return constant; } @@ -232,7 +232,7 @@ class ScalarEvolutionContext ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) { var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); - ScevLocal* invariantLocal = new (m_comp, CMK_LoopScalarEvolution) ScevLocal(type, lclNum, ssaNum); + ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum); return invariantLocal; } @@ -250,7 +250,7 @@ class ScalarEvolutionContext ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) { assert(op != nullptr); - ScevUnop* ext = new (m_comp, CMK_LoopScalarEvolution) ScevUnop(oper, targetType, op); + ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op); return ext; } @@ -268,7 +268,7 @@ class ScalarEvolutionContext ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) { assert((op1 != nullptr) && (op2 != nullptr)); - ScevBinop* binop = new (m_comp, CMK_LoopScalarEvolution) ScevBinop(oper, op1->Type, op1, op2); + ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2); return binop; } @@ -285,7 +285,7 @@ class ScalarEvolutionContext ScevAddRec* NewAddRec(Scev* start, Scev* step) { assert((start != nullptr) && (step != nullptr)); - ScevAddRec* addRec = new (m_comp, CMK_LoopScalarEvolution) ScevAddRec(start->Type, start, step); + ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step); return addRec; } @@ -963,6 +963,12 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return result != BasicBlockVisit::Abort; } +struct IVUse +{ + BasicBlock* Block; + Statement* Statement; +}; + //------------------------------------------------------------------------ // optIsIVWideningProfitable: Check to see if IV widening is profitable. // @@ -984,10 +990,11 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) // 2. We need to store the wide IV back into the narrow one in each of // the exits where the narrow IV is live-in. // -bool Compiler::optIsIVWideningProfitable(unsigned lclNum, - BasicBlock* initBlock, - bool initedToConstant, - FlowGraphNaturalLoop* loop) +bool Compiler::optIsIVWideningProfitable(unsigned lclNum, + BasicBlock* initBlock, + bool initedToConstant, + FlowGraphNaturalLoop* loop, + ArrayStack& ivUses) { for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder()) { @@ -1011,75 +1018,59 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, } } - struct CountZeroExtensionsVisitor : GenTreeVisitor - { - private: - unsigned m_lclNum; - - public: - enum - { - DoPreOrder = true, - }; + const weight_t ExtensionCost = 2; + const int ExtensionSize = 3; - unsigned NumExtensions = 0; + weight_t savedCost = 0; + int savedSize = 0; - CountZeroExtensionsVisitor(Compiler* comp, unsigned lclNum) : GenTreeVisitor(comp), m_lclNum(lclNum) + loop->VisitLoopBlocks([&](BasicBlock* block) { + for (Statement* stmt : block->NonPhiStatements()) { - } + bool hasUse = false; + int numExtensions = 0; + for (GenTree* node : stmt->TreeList()) + { + if (!node->OperIs(GT_CAST)) + { + hasUse |= node->OperIsLocal() && (node->AsLclVarCommon()->GetLclNum() == lclNum); + continue; + } - fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) - { - GenTree* node = *use; + GenTreeCast* cast = node->AsCast(); + if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned() || cast->gtOverflow()) + { + continue; + } - if (!node->OperIs(GT_CAST)) - { - return WALK_CONTINUE; - } + GenTree* op = cast->CastOp(); + if (!op->OperIs(GT_LCL_VAR) || (op->AsLclVarCommon()->GetLclNum() != lclNum)) + { + continue; + } - GenTreeCast* cast = node->AsCast(); - if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned() || cast->gtOverflow()) - { - return WALK_CONTINUE; - } + // If this is already the source of a store then it is going to be + // free in our backends regardless. + GenTree* parent = node->gtGetParent(nullptr); + if ((parent != nullptr) && parent->OperIs(GT_STORE_LCL_VAR)) + { + continue; + } - GenTree* op = cast->CastOp(); - if (!op->OperIs(GT_LCL_VAR) || (op->AsLclVarCommon()->GetLclNum() != m_lclNum)) - { - return WALK_CONTINUE; + numExtensions++; } - // If this is already the source of a store then it is going to be - // free in our backends regardless. - if ((parent != nullptr) && parent->OperIs(GT_STORE_LCL_VAR)) + if (hasUse) { - return WALK_CONTINUE; + ivUses.Push(stmt); } - NumExtensions++; - return WALK_SKIP_SUBTREES; - } - }; - - const weight_t ExtensionCost = 2; - const int ExtensionSize = 3; - - CountZeroExtensionsVisitor visitor(this, lclNum); - weight_t savedCost = 0; - int savedSize = 0; - - loop->VisitLoopBlocks([&](BasicBlock* block) { - for (Statement* stmt : block->NonPhiStatements()) - { - visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); - - if (visitor.NumExtensions > 0) + if (numExtensions > 0) { - JITDUMP(" Found %u zero extensions in " FMT_STMT "\n", visitor.NumExtensions, stmt->GetID()); + JITDUMP(" Found %d zero extensions in " FMT_STMT "\n", numExtensions, stmt->GetID()); - savedSize += (int)visitor.NumExtensions * ExtensionSize; - savedCost += visitor.NumExtensions * block->getBBWeight(this) * ExtensionCost; - visitor.NumExtensions = 0; + savedSize += numExtensions * ExtensionSize; + savedCost += numExtensions * block->getBBWeight(this) * ExtensionCost; } } @@ -1144,11 +1135,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, // Returns: // True if any store was created in any exit block. // -bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) +void Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { - bool anySunk = false; - LclVarDsc* dsc = lvaGetDesc(lclNum); - loop->VisitRegularExitBlocks([=, &anySunk](BasicBlock* exit) { + LclVarDsc* dsc = lvaGetDesc(lclNum); + loop->VisitRegularExitBlocks([=](BasicBlock* exit) { if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { return BasicBlockVisit::Continue; @@ -1160,12 +1150,9 @@ bool Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNa JITDUMP("Narrow IV local V%02u live into exit block " FMT_BB "; sinking a narrowing\n", lclNum, exit->bbNum); DISPSTMT(newStmt); fgInsertStmtAtBeg(exit, newStmt); - anySunk = true; return BasicBlockVisit::Continue; }); - - return anySunk; } //------------------------------------------------------------------------ @@ -1344,6 +1331,7 @@ PhaseStatus Compiler::optInductionVariables() ScalarEvolutionContext scevContext(this); JITDUMP("Widening primary induction variables:\n"); + ArrayStack ivUses(getAllocator(CMK_LoopIVOpts)); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { JITDUMP("Processing "); @@ -1419,7 +1407,8 @@ PhaseStatus Compiler::optInductionVariables() initBlock = startSsaDsc->GetBlock(); } - if (!optIsIVWideningProfitable(lcl->GetLclNum(), initBlock, initToConstant, loop)) + ivUses.Reset(); + if (!optIsIVWideningProfitable(lcl->GetLclNum(), initBlock, initToConstant, loop, ivUses)) { continue; } @@ -1504,25 +1493,23 @@ PhaseStatus Compiler::optInductionVariables() if (initStmt != nullptr) { + JITDUMP(" Replacing on the way to the loop\n"); optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, initStmt->GetNextStmt()); } - loop->VisitLoopBlocks([=](BasicBlock* block) { - - for (Statement* stmt : block->NonPhiStatements()) - { - JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, - dspTreeID(stmt->GetRootNode())); - DISPSTMT(stmt); - JITDUMP("\n"); - optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt); - } - - return BasicBlockVisit::Continue; - }); + JITDUMP(" Replacing in the loop; %d statements with appearences\n", ivUses.Height()); + for (int i = 0; i < ivUses.Height(); i++) + { + Statement* stmt = ivUses.Bottom(i); + JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum, + dspTreeID(stmt->GetRootNode())); + DISPSTMT(stmt); + JITDUMP("\n"); + optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt); + } - changed |= optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); + optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop); } } From 410c5c2122678faf61193dd650c2aa95ea1d8813 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 23 Feb 2024 11:16:12 +0100 Subject: [PATCH 58/64] Clean up, address some feedback --- src/coreclr/jit/inductionvariableopts.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index b92e6e30d679ad..293483e4554470 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -432,6 +432,7 @@ Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) // Parameters: // block - Block containing the tree // tree - Tree node +// depth - Current analysis depth // // Returns: // SCEV node if the tree was analyzable; otherwise nullptr if the value is @@ -708,7 +709,7 @@ const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64; // Parameters: // block - Block containing the tree // tree - Tree node -// depth - Current analysis depth. +// depth - Current analysis depth // // Returns: // SCEV node if the tree was analyzable; otherwise nullptr if the value is @@ -963,19 +964,16 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) return result != BasicBlockVisit::Abort; } -struct IVUse -{ - BasicBlock* Block; - Statement* Statement; -}; - //------------------------------------------------------------------------ // optIsIVWideningProfitable: Check to see if IV widening is profitable. // // Parameters: -// lclNum - The primary induction variable -// needsInitialization - Whether or not the widened IV will need explicit initialization -// loop - The loop +// lclNum - The primary induction variable +// initBlock - The block in where the new IV would be initialized +// initedToConstant - Whether or not the new IV will be initialized to a constant +// loop - The loop +// ivUses - Statements in which "lclNum" appears will be added to this list +// // // Returns: // True if IV widening is profitable. From f6efa2140e235f7264a30730e0a55ccd258ed4f8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 23 Feb 2024 11:20:01 +0100 Subject: [PATCH 59/64] More feedback --- src/coreclr/jit/inductionvariableopts.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 293483e4554470..32518446ddc836 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -664,6 +664,7 @@ Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStor } else { + // Not a simple IV shape (i.e. more complex than "i = i + k") return nullptr; } From 054ad94ef9dae6b944f6d87c1d8dc80a923374a6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Feb 2024 11:51:02 +0100 Subject: [PATCH 60/64] Split out into multiple files --- src/coreclr/jit/CMakeLists.txt | 2 + src/coreclr/jit/inductionvariableopts.cpp | 878 +--------------------- src/coreclr/jit/scev.cpp | 669 +++++++++++++++++ src/coreclr/jit/scev.h | 225 ++++++ 4 files changed, 897 insertions(+), 877 deletions(-) create mode 100644 src/coreclr/jit/scev.cpp create mode 100644 src/coreclr/jit/scev.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 9e90c23ac810ab..6e114f0f04a119 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -165,6 +165,7 @@ set( JIT_SOURCES regalloc.cpp registerargconvention.cpp regset.cpp + scev.cpp scopeinfo.cpp sideeffects.cpp sm.cpp @@ -360,6 +361,7 @@ set( JIT_HEADERS registerargconvention.h register.h regset.h + scev.h sideeffects.h simd.h simdashwintrinsic.h diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 32518446ddc836..db3a508296d25c 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -8,883 +8,7 @@ // induction variables." and also by LLVM's scalar evolution. #include "jitpch.h" - -// Evolving values are described using a small IR based around the following -// possible operations. At the core is ScevOper::AddRec, which represents a -// value that evolves by an add recurrence. In dumps it is described by where "loop" is the loop the value is evolving in, "start" is -// the initial value and "step" is the step by which the value evolves in every -// iteration. -// -enum class ScevOper -{ - Constant, - Local, - ZeroExtend, - SignExtend, - Add, - Mul, - Lsh, - AddRec, -}; - -static bool ScevOperIs(ScevOper oper, ScevOper otherOper) -{ - return oper == otherOper; -} - -template -static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail) -{ - return oper == operFirst || ScevOperIs(oper, operTail...); -} - -struct Scev -{ - const ScevOper Oper; - const var_types Type; - - Scev(ScevOper oper, var_types type) : Oper(oper), Type(type) - { - } - - template - bool OperIs(Args... opers) - { - return ScevOperIs(Oper, opers...); - } - - bool TypeIs(var_types type) - { - return Type == type; - } - - bool GetConstantValue(Compiler* comp, int64_t* cns); -}; - -struct ScevConstant : Scev -{ - ScevConstant(var_types type, int64_t value) : Scev(ScevOper::Constant, type), Value(value) - { - } - - int64_t Value; -}; - -struct ScevLocal : Scev -{ - ScevLocal(var_types type, unsigned lclNum, unsigned ssaNum) - : Scev(ScevOper::Local, type), LclNum(lclNum), SsaNum(ssaNum) - { - } - - const unsigned LclNum; - const unsigned SsaNum; - - //------------------------------------------------------------------------ - // GetConstantValue: If this SSA use refers to a constant, then fetch that - // constant. - // - // Parameters: - // comp - Compiler instance - // cns - [out] Constant value; only valid if this function returns true. - // - // Returns: - // True if this SSA use refers to a constant; otherwise false, - // - bool GetConstantValue(Compiler* comp, int64_t* cns) - { - LclVarDsc* dsc = comp->lvaGetDesc(LclNum); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum); - GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode(); - if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) - { - *cns = defNode->Data()->AsIntConCommon()->IntegralValue(); - return true; - } - - return false; - } -}; - -struct ScevUnop : Scev -{ - ScevUnop(ScevOper oper, var_types type, Scev* op1) : Scev(oper, type), Op1(op1) - { - } - - Scev* const Op1; -}; - -struct ScevBinop : ScevUnop -{ - ScevBinop(ScevOper oper, var_types type, Scev* op1, Scev* op2) : ScevUnop(oper, type, op1), Op2(op2) - { - } - - Scev* const Op2; -}; - -// Represents a value that evolves by an add recurrence. -// The value at iteration N is Start + N * Step. -// "Start" and "Step" are guaranteed to be invariant in "Loop". -struct ScevAddRec : Scev -{ - ScevAddRec(var_types type, Scev* start, Scev* step) : Scev(ScevOper::AddRec, type), Start(start), Step(step) - { - } - - Scev* const Start; - Scev* const Step; -}; - -//------------------------------------------------------------------------ -// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an -// inline constant or an SSA use referring to a constant) then obtain that -// constant. -// -// Parameters: -// comp - Compiler instance -// cns - [out] Constant value; only valid if this function returns true. -// -// Returns: -// True if a constant could be extracted. -// -bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) -{ - if (OperIs(ScevOper::Constant)) - { - *cns = ((ScevConstant*)this)->Value; - return true; - } - - if (OperIs(ScevOper::Local)) - { - return ((ScevLocal*)this)->GetConstantValue(comp, cns); - } - - return false; -} - -typedef JitHashTable, Scev*> ScalarEvolutionMap; - -// Scalar evolution is analyzed in the context of a single loop, and are -// computed on-demand by the use of the "Analyze" method on this class, which -// also maintains a cache. -class ScalarEvolutionContext -{ - Compiler* m_comp; - FlowGraphNaturalLoop* m_loop = nullptr; - ScalarEvolutionMap m_cache; - - Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); - Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); - Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - ScevLocal* start, - BasicBlock* stepDefBlock, - GenTree* stepDefData); - Scev* CreateSimpleInvariantScev(GenTree* tree); - Scev* CreateScevForConstant(GenTreeIntConCommon* tree); - -public: - ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts)) - { - } - - void DumpScev(Scev* scev); - - //------------------------------------------------------------------------ - // ResetForLoop: Reset the internal cache in preparation of scalar - // evolution analysis inside a new loop. - // - // Parameters: - // loop - The loop. - // - void ResetForLoop(FlowGraphNaturalLoop* loop) - { - m_loop = loop; - m_cache.RemoveAll(); - } - - //------------------------------------------------------------------------ - // NewConstant: Create a SCEV node that represents a constant. - // - // Returns: - // The new node. - // - ScevConstant* NewConstant(var_types type, int64_t value) - { - ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value); - return constant; - } - - //------------------------------------------------------------------------ - // NewLocal: Create a SCEV node that represents an invariant local (i.e. a - // use of an SSA def from outside the loop). - // - // Parameters: - // lclNum - The local - // ssaNum - The SSA number of the def outside the loop that is being used. - // - // Returns: - // The new node. - // - ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) - { - var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); - ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum); - return invariantLocal; - } - - //------------------------------------------------------------------------ - // NewExtension: Create a SCEV node that represents a zero or sign extension. - // - // Parameters: - // oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend) - // targetType - The target type of the extension - // op - The operand being extended. - // - // Returns: - // The new node. - // - ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) - { - assert(op != nullptr); - ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op); - return ext; - } - - //------------------------------------------------------------------------ - // NewBinop: Create a SCEV node that represents a binary operation. - // - // Parameters: - // oper - The operation - // op1 - First operand - // op2 - Second operand - // - // Returns: - // The new node. - // - ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) - { - assert((op1 != nullptr) && (op2 != nullptr)); - ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2); - return binop; - } - - //------------------------------------------------------------------------ - // NewAddRec: Create a SCEV node that represents a new add recurrence. - // - // Parameters: - // start - Value of the recurrence at the first iteration - // step - Step value of the recurrence - // - // Returns: - // The new node. - // - ScevAddRec* NewAddRec(Scev* start, Scev* step) - { - assert((start != nullptr) && (step != nullptr)); - ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step); - return addRec; - } - - Scev* Analyze(BasicBlock* block, GenTree* tree); - Scev* Simplify(Scev* scev); -}; - -#ifdef DEBUG -//------------------------------------------------------------------------ -// DumpScev: Print a scev node to stdout. -// -// Parameters: -// scev - The scev node. -// -void ScalarEvolutionContext::DumpScev(Scev* scev) -{ - switch (scev->Oper) - { - case ScevOper::Constant: - { - ScevConstant* cns = (ScevConstant*)scev; - printf("%zd", (ssize_t)cns->Value); - break; - } - case ScevOper::Local: - { - ScevLocal* invariantLocal = (ScevLocal*)scev; - printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); - - int64_t cns; - if (invariantLocal->GetConstantValue(m_comp, &cns)) - { - printf(" (%lld)", (long long)cns); - } - break; - } - case ScevOper::ZeroExtend: - case ScevOper::SignExtend: - { - ScevUnop* unop = (ScevUnop*)scev; - printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8); - DumpScev(unop->Op1); - printf(")"); - break; - } - case ScevOper::Add: - case ScevOper::Mul: - case ScevOper::Lsh: - { - ScevBinop* binop = (ScevBinop*)scev; - printf("("); - DumpScev(binop->Op1); - const char* op; - switch (binop->Oper) - { - case ScevOper::Add: - op = "+"; - break; - case ScevOper::Mul: - op = "*"; - break; - case ScevOper::Lsh: - op = "<<"; - break; - default: - unreached(); - } - printf(" %s ", op); - DumpScev(binop->Op2); - printf(")"); - break; - } - case ScevOper::AddRec: - { - ScevAddRec* addRec = (ScevAddRec*)scev; - printf("<" FMT_LP, m_loop->GetIndex()); - printf(", "); - DumpScev(addRec->Start); - printf(", "); - DumpScev(addRec->Step); - printf(">"); - break; - } - default: - unreached(); - } -} -#endif - -//------------------------------------------------------------------------ -// CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree: -// either an invariant local use or a constant. -// -// Parameters: -// tree - The tree -// -// Returns: -// SCEV node or nullptr if the tree is not a simple invariant. -// -Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) -{ - if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG)) - { - return CreateScevForConstant(tree->AsIntConCommon()); - } - - if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) - { - LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); - - if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) - { - return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); - } - } - - return nullptr; -} - -//------------------------------------------------------------------------ -// CreateScevForConstant: Given an integer constant, create a SCEV node for it. -// -// Parameters: -// tree - The integer constant -// -// Returns: -// SCEV node or nullptr if the integer constant is not representable (e.g. a handle). -// -Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) -{ - if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG)) - { - return nullptr; - } - - return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); -} - -//------------------------------------------------------------------------ -// AnalyzeNew: Analyze the specified tree in the specified block, without going -// through the cache. -// -// Parameters: -// block - Block containing the tree -// tree - Tree node -// depth - Current analysis depth -// -// Returns: -// SCEV node if the tree was analyzable; otherwise nullptr if the value is -// cannot be described. -// -Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int depth) -{ - switch (tree->OperGet()) - { - case GT_CNS_INT: - case GT_CNS_LNG: - { - return CreateScevForConstant(tree->AsIntConCommon()); - } - case GT_LCL_VAR: - case GT_PHI_ARG: - { - if (!tree->AsLclVarCommon()->HasSsaName()) - { - return nullptr; - } - - assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())); - LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); - - if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) - { - return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); - } - - if (ssaDsc->GetDefNode() == nullptr) - { - // GT_CALL retbuf def? - return nullptr; - } - - if (ssaDsc->GetDefNode()->GetLclNum() != tree->AsLclVarCommon()->GetLclNum()) - { - // Should be a def of the parent - assert(dsc->lvIsStructField && (ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl)); - return nullptr; - } - - return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode(), depth + 1); - } - case GT_STORE_LCL_VAR: - { - GenTreeLclVarCommon* store = tree->AsLclVarCommon(); - GenTree* data = store->Data(); - if (!data->OperIs(GT_PHI)) - { - return Analyze(block, data, depth + 1); - } - - if (block != m_loop->GetHeader()) - { - return nullptr; - } - - // We have a phi def for the current loop. Look for a primary - // induction variable. - GenTreePhi* phi = data->AsPhi(); - GenTreePhiArg* enterSsa = nullptr; - GenTreePhiArg* backedgeSsa = nullptr; - - for (GenTreePhi::Use& use : phi->Uses()) - { - GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); - GenTreePhiArg*& ssaArg = m_loop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa; - if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum())) - { - ssaArg = phiArg; - } - else - { - return nullptr; - } - } - - if ((enterSsa == nullptr) || (backedgeSsa == nullptr)) - { - return nullptr; - } - - ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum()); - - LclVarDsc* dsc = m_comp->lvaGetDesc(store); - LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); - - if (ssaDsc->GetDefNode() == nullptr) - { - // GT_CALL retbuf def - return nullptr; - } - - if (ssaDsc->GetDefNode()->GetLclNum() != store->GetLclNum()) - { - assert(dsc->lvIsStructField && ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl); - return nullptr; - } - - assert(ssaDsc->GetBlock() != nullptr); - - // We currently do not handle complicated addrecs. We can do this - // by inserting a symbolic node in the cache and analyzing while it - // is part of the cache. It would allow us to model - // - // int i = 0; - // while (i < n) - // { - // int j = i + 1; - // ... - // i = j; - // } - // => - // - // and chains of recurrences, such as - // - // int i = 0; - // int j = 0; - // while (i < n) - // { - // j++; - // i += j; - // } - // => > - // - // The main issue is that it requires cache invalidation afterwards - // and turning the recursive result into an addrec. - // - return CreateSimpleAddRec(store, enterScev, ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data()); - } - case GT_CAST: - { - GenTreeCast* cast = tree->AsCast(); - if (cast->gtCastType != TYP_LONG) - { - return nullptr; - } - - Scev* op = Analyze(block, cast->CastOp(), depth + 1); - if (op == nullptr) - { - return nullptr; - } - - return NewExtension(cast->IsUnsigned() ? ScevOper::ZeroExtend : ScevOper::SignExtend, TYP_LONG, op); - } - case GT_ADD: - case GT_MUL: - case GT_LSH: - { - Scev* op1 = Analyze(block, tree->gtGetOp1(), depth + 1); - if (op1 == nullptr) - return nullptr; - - Scev* op2 = Analyze(block, tree->gtGetOp2(), depth + 1); - if (op2 == nullptr) - return nullptr; - - ScevOper oper; - switch (tree->OperGet()) - { - case GT_ADD: - oper = ScevOper::Add; - break; - case GT_MUL: - oper = ScevOper::Mul; - break; - case GT_LSH: - oper = ScevOper::Lsh; - break; - default: - unreached(); - } - - return NewBinop(oper, op1, op2); - } - case GT_COMMA: - { - return Analyze(block, tree->gtGetOp2(), depth + 1); - } - case GT_ARR_ADDR: - { - return Analyze(block, tree->AsArrAddr()->Addr(), depth + 1); - } - default: - return nullptr; - } -} - -//------------------------------------------------------------------------ -// CreateSimpleAddRec: Create a "simple" add-recurrence. This handles the most -// common patterns for primary induction variables where we see a store like -// "i = i + 1". -// -// Parameters: -// headerStore - Phi definition of the candidate primary induction variable -// enterScev - SCEV describing start value of the primary induction variable -// stepDefBlock - Block containing the def of the step value -// stepDefData - Value of the def of the step value -// -// Returns: -// SCEV node if this is a simple addrec shape. Otherwise nullptr. -// -Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, - ScevLocal* enterScev, - BasicBlock* stepDefBlock, - GenTree* stepDefData) -{ - if (!stepDefData->OperIs(GT_ADD)) - { - return nullptr; - } - - GenTree* stepTree; - GenTree* op1 = stepDefData->gtGetOp1(); - GenTree* op2 = stepDefData->gtGetOp2(); - if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && - (op1->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) - { - stepTree = op2; - } - else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && - (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) - { - stepTree = op1; - } - else - { - // Not a simple IV shape (i.e. more complex than "i = i + k") - return nullptr; - } - - Scev* stepScev = CreateSimpleInvariantScev(stepTree); - if (stepScev == nullptr) - { - return nullptr; - } - - return NewAddRec(enterScev, stepScev); -} - -//------------------------------------------------------------------------ -// Analyze: Analyze the specified tree in the specified block. -// -// Parameters: -// block - Block containing the tree -// tree - Tree node -// -// Returns: -// SCEV node if the tree was analyzable; otherwise nullptr if the value is -// cannot be described. -// -Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) -{ - return Analyze(block, tree, 0); -} - -// Since the analysis follows SSA defs we have no upper bound on the potential -// depth of the analysis performed. We put an artificial limit on this for two -// reasons: -// 1. The analysis is recursive, and we should not stack overflow regardless of -// the input program. -// 2. If we produced arbitrarily deep SCEV trees then all algorithms over their -// structure would similarly be at risk of stack overflows if they were -// recursive. However, these algorithms are generally much more elegant when -// they make use of recursion. -const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64; - -//------------------------------------------------------------------------ -// Analyze: Analyze the specified tree in the specified block. -// -// Parameters: -// block - Block containing the tree -// tree - Tree node -// depth - Current analysis depth -// -// Returns: -// SCEV node if the tree was analyzable; otherwise nullptr if the value is -// cannot be described. -// -Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int depth) -{ - Scev* result; - if (!m_cache.Lookup(tree, &result)) - { - if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH) - { - return nullptr; - } - - result = AnalyzeNew(block, tree, depth); - m_cache.Set(tree, result); - } - - return result; -} - -//------------------------------------------------------------------------ -// FoldBinop: Fold simple binops. -// -// Type parameters: -// T - Type that the binop is being evaluated in -// -// Parameters: -// oper - Binary operation -// op1 - First operand -// op2 - Second operand -// -// Returns: -// Folded value. -// -template -static T FoldBinop(ScevOper oper, T op1, T op2) -{ - switch (oper) - { - case ScevOper::Add: - return op1 + op2; - case ScevOper::Mul: - return op1 * op2; - case ScevOper::Lsh: - return op1 << op2; - default: - unreached(); - } -} - -//------------------------------------------------------------------------ -// Simplify: Try to simplify a SCEV node by folding and canonicalization. -// -// Parameters: -// scev - The node -// -// Returns: -// Simplified node. -// -// Remarks: -// Canonicalization is done for binops; constants are moved to the right and -// addrecs are moved to the left. -// -// Simple unops/binops on constants are folded. Operands are distributed into -// add recs whenever possible. -// -Scev* ScalarEvolutionContext::Simplify(Scev* scev) -{ - switch (scev->Oper) - { - case ScevOper::Constant: - case ScevOper::Local: - { - return scev; - } - case ScevOper::ZeroExtend: - case ScevOper::SignExtend: - { - ScevUnop* unop = (ScevUnop*)scev; - assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type)); - - Scev* op1 = Simplify(unop->Op1); - - if (unop->Type == op1->Type) - { - return op1; - } - - assert((unop->Type == TYP_LONG) && (op1->Type == TYP_INT)); - - if (op1->OperIs(ScevOper::Constant)) - { - ScevConstant* cns = (ScevConstant*)op1; - return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value - : (int64_t)(int32_t)cns->Value); - } - - if (op1->OperIs(ScevOper::AddRec)) - { - // TODO-Cleanup: This requires some proof that it is ok, but - // currently we do not rely on this. - return op1; - } - - return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1); - } - case ScevOper::Add: - case ScevOper::Mul: - case ScevOper::Lsh: - { - ScevBinop* binop = (ScevBinop*)scev; - Scev* op1 = Simplify(binop->Op1); - Scev* op2 = Simplify(binop->Op2); - - if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) - { - // Normalize addrecs to the left - if (op2->OperIs(ScevOper::AddRec) && !op1->OperIs(ScevOper::AddRec)) - { - std::swap(op1, op2); - } - // Normalize constants to the right - if (op1->OperIs(ScevOper::Constant) && !op2->OperIs(ScevOper::Constant)) - { - std::swap(op1, op2); - } - } - - if (op1->OperIs(ScevOper::AddRec)) - { - // + x => - // * x => - ScevAddRec* addRec = (ScevAddRec*)op1; - Scev* newStart = Simplify(NewBinop(binop->Oper, addRec->Start, op2)); - Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) - ? Simplify(NewBinop(binop->Oper, addRec->Step, op2)) - : addRec->Step; - return NewAddRec(newStart, newStep); - } - - if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant)) - { - ScevConstant* cns1 = (ScevConstant*)op1; - ScevConstant* cns2 = (ScevConstant*)op2; - int64_t newValue; - if (binop->TypeIs(TYP_INT)) - { - newValue = FoldBinop(binop->Oper, static_cast(cns1->Value), - static_cast(cns2->Value)); - } - else - { - assert(binop->TypeIs(TYP_LONG)); - newValue = FoldBinop(binop->Oper, cns1->Value, cns2->Value); - } - - return NewConstant(binop->Type, newValue); - } - - return (op1 == binop->Op1) && (op2 == binop->Op2) ? binop : NewBinop(binop->Oper, op1, op2); - } - case ScevOper::AddRec: - { - ScevAddRec* addRec = (ScevAddRec*)scev; - Scev* start = Simplify(addRec->Start); - Scev* step = Simplify(addRec->Step); - return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(start, step); - } - default: - unreached(); - } -} +#include "scev.h" //------------------------------------------------------------------------ // optCanSinkWidenedIV: Check to see if we are able to sink a store to the old diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp new file mode 100644 index 00000000000000..46006dfa1be1c4 --- /dev/null +++ b/src/coreclr/jit/scev.cpp @@ -0,0 +1,669 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file contains code to analyze how the value of induction variables +// evolve (scalar evolution analysis), and to turn them into the SCEV IR +// defined in scev.h. The analysis is inspired by "Michael Wolfe. 1992. Beyond +// induction variables." and also by LLVM's scalar evolution analysis. + +#include "jitpch.h" +#include "scev.h" + +//------------------------------------------------------------------------ +// GetConstantValue: If this SSA use refers to a constant, then fetch that +// constant. +// +// Parameters: +// comp - Compiler instance +// cns - [out] Constant value; only valid if this function returns true. +// +// Returns: +// True if this SSA use refers to a constant; otherwise false, +// +bool ScevLocal::GetConstantValue(Compiler* comp, int64_t* cns) +{ + LclVarDsc* dsc = comp->lvaGetDesc(LclNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum); + GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode(); + if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + *cns = defNode->Data()->AsIntConCommon()->IntegralValue(); + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an +// inline constant or an SSA use referring to a constant) then obtain that +// constant. +// +// Parameters: +// comp - Compiler instance +// cns - [out] Constant value; only valid if this function returns true. +// +// Returns: +// True if a constant could be extracted. +// +bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) +{ + if (OperIs(ScevOper::Constant)) + { + *cns = ((ScevConstant*)this)->Value; + return true; + } + + if (OperIs(ScevOper::Local)) + { + return ((ScevLocal*)this)->GetConstantValue(comp, cns); + } + + return false; +} + +//------------------------------------------------------------------------ +// ResetForLoop: Reset the internal cache in preparation of scalar +// evolution analysis inside a new loop. +// +// Parameters: +// loop - The loop. +// +void ScalarEvolutionContext::ResetForLoop(FlowGraphNaturalLoop* loop) +{ + m_loop = loop; + m_cache.RemoveAll(); +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// DumpScev: Print a scev node to stdout. +// +// Parameters: +// scev - The scev node. +// +void ScalarEvolutionContext::DumpScev(Scev* scev) +{ + switch (scev->Oper) + { + case ScevOper::Constant: + { + ScevConstant* cns = (ScevConstant*)scev; + printf("%zd", (ssize_t)cns->Value); + break; + } + case ScevOper::Local: + { + ScevLocal* invariantLocal = (ScevLocal*)scev; + printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); + + int64_t cns; + if (invariantLocal->GetConstantValue(m_comp, &cns)) + { + printf(" (%lld)", (long long)cns); + } + break; + } + case ScevOper::ZeroExtend: + case ScevOper::SignExtend: + { + ScevUnop* unop = (ScevUnop*)scev; + printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8); + DumpScev(unop->Op1); + printf(")"); + break; + } + case ScevOper::Add: + case ScevOper::Mul: + case ScevOper::Lsh: + { + ScevBinop* binop = (ScevBinop*)scev; + printf("("); + DumpScev(binop->Op1); + const char* op; + switch (binop->Oper) + { + case ScevOper::Add: + op = "+"; + break; + case ScevOper::Mul: + op = "*"; + break; + case ScevOper::Lsh: + op = "<<"; + break; + default: + unreached(); + } + printf(" %s ", op); + DumpScev(binop->Op2); + printf(")"); + break; + } + case ScevOper::AddRec: + { + ScevAddRec* addRec = (ScevAddRec*)scev; + printf("<" FMT_LP, m_loop->GetIndex()); + printf(", "); + DumpScev(addRec->Start); + printf(", "); + DumpScev(addRec->Step); + printf(">"); + break; + } + default: + unreached(); + } +} +#endif + +//------------------------------------------------------------------------ +// CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree: +// either an invariant local use or a constant. +// +// Parameters: +// tree - The tree +// +// Returns: +// SCEV node or nullptr if the tree is not a simple invariant. +// +Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree) +{ + if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG)) + { + return CreateScevForConstant(tree->AsIntConCommon()); + } + + if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName()) + { + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); + + if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) + { + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// CreateScevForConstant: Given an integer constant, create a SCEV node for it. +// +// Parameters: +// tree - The integer constant +// +// Returns: +// SCEV node or nullptr if the integer constant is not representable (e.g. a handle). +// +Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree) +{ + if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG)) + { + return nullptr; + } + + return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue()); +} + +//------------------------------------------------------------------------ +// AnalyzeNew: Analyze the specified tree in the specified block, without going +// through the cache. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// depth - Current analysis depth +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// +Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int depth) +{ + switch (tree->OperGet()) + { + case GT_CNS_INT: + case GT_CNS_LNG: + { + return CreateScevForConstant(tree->AsIntConCommon()); + } + case GT_LCL_VAR: + case GT_PHI_ARG: + { + if (!tree->AsLclVarCommon()->HasSsaName()) + { + return nullptr; + } + + assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum())); + LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon()); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum()); + + if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock())) + { + return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum()); + } + + if (ssaDsc->GetDefNode() == nullptr) + { + // GT_CALL retbuf def? + return nullptr; + } + + if (ssaDsc->GetDefNode()->GetLclNum() != tree->AsLclVarCommon()->GetLclNum()) + { + // Should be a def of the parent + assert(dsc->lvIsStructField && (ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl)); + return nullptr; + } + + return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode(), depth + 1); + } + case GT_STORE_LCL_VAR: + { + GenTreeLclVarCommon* store = tree->AsLclVarCommon(); + GenTree* data = store->Data(); + if (!data->OperIs(GT_PHI)) + { + return Analyze(block, data, depth + 1); + } + + if (block != m_loop->GetHeader()) + { + return nullptr; + } + + // We have a phi def for the current loop. Look for a primary + // induction variable. + GenTreePhi* phi = data->AsPhi(); + GenTreePhiArg* enterSsa = nullptr; + GenTreePhiArg* backedgeSsa = nullptr; + + for (GenTreePhi::Use& use : phi->Uses()) + { + GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); + GenTreePhiArg*& ssaArg = m_loop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa; + if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum())) + { + ssaArg = phiArg; + } + else + { + return nullptr; + } + } + + if ((enterSsa == nullptr) || (backedgeSsa == nullptr)) + { + return nullptr; + } + + ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum()); + + LclVarDsc* dsc = m_comp->lvaGetDesc(store); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum()); + + if (ssaDsc->GetDefNode() == nullptr) + { + // GT_CALL retbuf def + return nullptr; + } + + if (ssaDsc->GetDefNode()->GetLclNum() != store->GetLclNum()) + { + assert(dsc->lvIsStructField && ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl); + return nullptr; + } + + assert(ssaDsc->GetBlock() != nullptr); + + // We currently do not handle complicated addrecs. We can do this + // by inserting a symbolic node in the cache and analyzing while it + // is part of the cache. It would allow us to model + // + // int i = 0; + // while (i < n) + // { + // int j = i + 1; + // ... + // i = j; + // } + // => + // + // and chains of recurrences, such as + // + // int i = 0; + // int j = 0; + // while (i < n) + // { + // j++; + // i += j; + // } + // => > + // + // The main issue is that it requires cache invalidation afterwards + // and turning the recursive result into an addrec. + // + return CreateSimpleAddRec(store, enterScev, ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data()); + } + case GT_CAST: + { + GenTreeCast* cast = tree->AsCast(); + if (cast->gtCastType != TYP_LONG) + { + return nullptr; + } + + Scev* op = Analyze(block, cast->CastOp(), depth + 1); + if (op == nullptr) + { + return nullptr; + } + + return NewExtension(cast->IsUnsigned() ? ScevOper::ZeroExtend : ScevOper::SignExtend, TYP_LONG, op); + } + case GT_ADD: + case GT_MUL: + case GT_LSH: + { + Scev* op1 = Analyze(block, tree->gtGetOp1(), depth + 1); + if (op1 == nullptr) + return nullptr; + + Scev* op2 = Analyze(block, tree->gtGetOp2(), depth + 1); + if (op2 == nullptr) + return nullptr; + + ScevOper oper; + switch (tree->OperGet()) + { + case GT_ADD: + oper = ScevOper::Add; + break; + case GT_MUL: + oper = ScevOper::Mul; + break; + case GT_LSH: + oper = ScevOper::Lsh; + break; + default: + unreached(); + } + + return NewBinop(oper, op1, op2); + } + case GT_COMMA: + { + return Analyze(block, tree->gtGetOp2(), depth + 1); + } + case GT_ARR_ADDR: + { + return Analyze(block, tree->AsArrAddr()->Addr(), depth + 1); + } + default: + return nullptr; + } +} + +//------------------------------------------------------------------------ +// CreateSimpleAddRec: Create a "simple" add-recurrence. This handles the most +// common patterns for primary induction variables where we see a store like +// "i = i + 1". +// +// Parameters: +// headerStore - Phi definition of the candidate primary induction variable +// enterScev - SCEV describing start value of the primary induction variable +// stepDefBlock - Block containing the def of the step value +// stepDefData - Value of the def of the step value +// +// Returns: +// SCEV node if this is a simple addrec shape. Otherwise nullptr. +// +Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, + ScevLocal* enterScev, + BasicBlock* stepDefBlock, + GenTree* stepDefData) +{ + if (!stepDefData->OperIs(GT_ADD)) + { + return nullptr; + } + + GenTree* stepTree; + GenTree* op1 = stepDefData->gtGetOp1(); + GenTree* op2 = stepDefData->gtGetOp2(); + if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && + (op1->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) + { + stepTree = op2; + } + else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) && + (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum())) + { + stepTree = op1; + } + else + { + // Not a simple IV shape (i.e. more complex than "i = i + k") + return nullptr; + } + + Scev* stepScev = CreateSimpleInvariantScev(stepTree); + if (stepScev == nullptr) + { + return nullptr; + } + + return NewAddRec(enterScev, stepScev); +} + +//------------------------------------------------------------------------ +// Analyze: Analyze the specified tree in the specified block. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// +Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree) +{ + return Analyze(block, tree, 0); +} + +// Since the analysis follows SSA defs we have no upper bound on the potential +// depth of the analysis performed. We put an artificial limit on this for two +// reasons: +// 1. The analysis is recursive, and we should not stack overflow regardless of +// the input program. +// 2. If we produced arbitrarily deep SCEV trees then all algorithms over their +// structure would similarly be at risk of stack overflows if they were +// recursive. However, these algorithms are generally much more elegant when +// they make use of recursion. +const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64; + +//------------------------------------------------------------------------ +// Analyze: Analyze the specified tree in the specified block. +// +// Parameters: +// block - Block containing the tree +// tree - Tree node +// depth - Current analysis depth +// +// Returns: +// SCEV node if the tree was analyzable; otherwise nullptr if the value is +// cannot be described. +// +Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int depth) +{ + Scev* result; + if (!m_cache.Lookup(tree, &result)) + { + if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH) + { + return nullptr; + } + + result = AnalyzeNew(block, tree, depth); + m_cache.Set(tree, result); + } + + return result; +} + +//------------------------------------------------------------------------ +// FoldBinop: Fold simple binops. +// +// Type parameters: +// T - Type that the binop is being evaluated in +// +// Parameters: +// oper - Binary operation +// op1 - First operand +// op2 - Second operand +// +// Returns: +// Folded value. +// +template +static T FoldBinop(ScevOper oper, T op1, T op2) +{ + switch (oper) + { + case ScevOper::Add: + return op1 + op2; + case ScevOper::Mul: + return op1 * op2; + case ScevOper::Lsh: + return op1 << op2; + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// Simplify: Try to simplify a SCEV node by folding and canonicalization. +// +// Parameters: +// scev - The node +// +// Returns: +// Simplified node. +// +// Remarks: +// Canonicalization is done for binops; constants are moved to the right and +// addrecs are moved to the left. +// +// Simple unops/binops on constants are folded. Operands are distributed into +// add recs whenever possible. +// +Scev* ScalarEvolutionContext::Simplify(Scev* scev) +{ + switch (scev->Oper) + { + case ScevOper::Constant: + case ScevOper::Local: + { + return scev; + } + case ScevOper::ZeroExtend: + case ScevOper::SignExtend: + { + ScevUnop* unop = (ScevUnop*)scev; + assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type)); + + Scev* op1 = Simplify(unop->Op1); + + if (unop->Type == op1->Type) + { + return op1; + } + + assert((unop->Type == TYP_LONG) && (op1->Type == TYP_INT)); + + if (op1->OperIs(ScevOper::Constant)) + { + ScevConstant* cns = (ScevConstant*)op1; + return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value + : (int64_t)(int32_t)cns->Value); + } + + if (op1->OperIs(ScevOper::AddRec)) + { + // TODO-Cleanup: This requires some proof that it is ok, but + // currently we do not rely on this. + return op1; + } + + return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1); + } + case ScevOper::Add: + case ScevOper::Mul: + case ScevOper::Lsh: + { + ScevBinop* binop = (ScevBinop*)scev; + Scev* op1 = Simplify(binop->Op1); + Scev* op2 = Simplify(binop->Op2); + + if (binop->OperIs(ScevOper::Add, ScevOper::Mul)) + { + // Normalize addrecs to the left + if (op2->OperIs(ScevOper::AddRec) && !op1->OperIs(ScevOper::AddRec)) + { + std::swap(op1, op2); + } + // Normalize constants to the right + if (op1->OperIs(ScevOper::Constant) && !op2->OperIs(ScevOper::Constant)) + { + std::swap(op1, op2); + } + } + + if (op1->OperIs(ScevOper::AddRec)) + { + // + x => + // * x => + ScevAddRec* addRec = (ScevAddRec*)op1; + Scev* newStart = Simplify(NewBinop(binop->Oper, addRec->Start, op2)); + Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh) + ? Simplify(NewBinop(binop->Oper, addRec->Step, op2)) + : addRec->Step; + return NewAddRec(newStart, newStep); + } + + if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant)) + { + ScevConstant* cns1 = (ScevConstant*)op1; + ScevConstant* cns2 = (ScevConstant*)op2; + int64_t newValue; + if (binop->TypeIs(TYP_INT)) + { + newValue = FoldBinop(binop->Oper, static_cast(cns1->Value), + static_cast(cns2->Value)); + } + else + { + assert(binop->TypeIs(TYP_LONG)); + newValue = FoldBinop(binop->Oper, cns1->Value, cns2->Value); + } + + return NewConstant(binop->Type, newValue); + } + + return (op1 == binop->Op1) && (op2 == binop->Op2) ? binop : NewBinop(binop->Oper, op1, op2); + } + case ScevOper::AddRec: + { + ScevAddRec* addRec = (ScevAddRec*)scev; + Scev* start = Simplify(addRec->Start); + Scev* step = Simplify(addRec->Step); + return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(start, step); + } + default: + unreached(); + } +} diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h new file mode 100644 index 00000000000000..29172f5d63bfba --- /dev/null +++ b/src/coreclr/jit/scev.h @@ -0,0 +1,225 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file contains the definition of the scalar evolution IR. This IR allows +// representing the values of IR nodes inside loops in a closed form, taking +// into account that they are changing on each loop iteration. The IR is based +// around the following possible operations. At the core is ScevOper::AddRec, +// which represents a value that evolves by an add recurrence. In dumps it is +// described by where "loop" is the loop the value is +// evolving in, "start" is the initial value and "step" is the step by which +// the value evolves in every iteration. +// +enum class ScevOper +{ + Constant, + Local, + ZeroExtend, + SignExtend, + Add, + Mul, + Lsh, + AddRec, +}; + +static bool ScevOperIs(ScevOper oper, ScevOper otherOper) +{ + return oper == otherOper; +} + +template +static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail) +{ + return oper == operFirst || ScevOperIs(oper, operTail...); +} + +struct Scev +{ + const ScevOper Oper; + const var_types Type; + + Scev(ScevOper oper, var_types type) : Oper(oper), Type(type) + { + } + + template + bool OperIs(Args... opers) + { + return ScevOperIs(Oper, opers...); + } + + bool TypeIs(var_types type) + { + return Type == type; + } + + bool GetConstantValue(Compiler* comp, int64_t* cns); +}; + +struct ScevConstant : Scev +{ + ScevConstant(var_types type, int64_t value) : Scev(ScevOper::Constant, type), Value(value) + { + } + + int64_t Value; +}; + +struct ScevLocal : Scev +{ + ScevLocal(var_types type, unsigned lclNum, unsigned ssaNum) + : Scev(ScevOper::Local, type), LclNum(lclNum), SsaNum(ssaNum) + { + } + + const unsigned LclNum; + const unsigned SsaNum; + + bool GetConstantValue(Compiler* comp, int64_t* cns); +}; + +struct ScevUnop : Scev +{ + ScevUnop(ScevOper oper, var_types type, Scev* op1) : Scev(oper, type), Op1(op1) + { + } + + Scev* const Op1; +}; + +struct ScevBinop : ScevUnop +{ + ScevBinop(ScevOper oper, var_types type, Scev* op1, Scev* op2) : ScevUnop(oper, type, op1), Op2(op2) + { + } + + Scev* const Op2; +}; + +// Represents a value that evolves by an add recurrence. +// The value at iteration N is Start + N * Step. +// "Start" and "Step" are guaranteed to be invariant in "Loop". +struct ScevAddRec : Scev +{ + ScevAddRec(var_types type, Scev* start, Scev* step) : Scev(ScevOper::AddRec, type), Start(start), Step(step) + { + } + + Scev* const Start; + Scev* const Step; +}; + +typedef JitHashTable, Scev*> ScalarEvolutionMap; + +// Scalar evolution is analyzed in the context of a single loop, and are +// computed on-demand by the use of the "Analyze" method on this class, which +// also maintains a cache. +class ScalarEvolutionContext +{ + Compiler* m_comp; + FlowGraphNaturalLoop* m_loop = nullptr; + ScalarEvolutionMap m_cache; + + Scev* Analyze(BasicBlock* block, GenTree* tree, int depth); + Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth); + Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore, + ScevLocal* start, + BasicBlock* stepDefBlock, + GenTree* stepDefData); + Scev* CreateSimpleInvariantScev(GenTree* tree); + Scev* CreateScevForConstant(GenTreeIntConCommon* tree); + +public: + ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts)) + { + } + + void ResetForLoop(FlowGraphNaturalLoop* loop); + void DumpScev(Scev* scev); + + //------------------------------------------------------------------------ + // NewConstant: Create a SCEV node that represents a constant. + // + // Returns: + // The new node. + // + ScevConstant* NewConstant(var_types type, int64_t value) + { + ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value); + return constant; + } + + //------------------------------------------------------------------------ + // NewLocal: Create a SCEV node that represents an invariant local (i.e. a + // use of an SSA def from outside the loop). + // + // Parameters: + // lclNum - The local + // ssaNum - The SSA number of the def outside the loop that is being used. + // + // Returns: + // The new node. + // + ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) + { + var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); + ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum); + return invariantLocal; + } + + //------------------------------------------------------------------------ + // NewExtension: Create a SCEV node that represents a zero or sign extension. + // + // Parameters: + // oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend) + // targetType - The target type of the extension + // op - The operand being extended. + // + // Returns: + // The new node. + // + ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) + { + assert(op != nullptr); + ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op); + return ext; + } + + //------------------------------------------------------------------------ + // NewBinop: Create a SCEV node that represents a binary operation. + // + // Parameters: + // oper - The operation + // op1 - First operand + // op2 - Second operand + // + // Returns: + // The new node. + // + ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) + { + assert((op1 != nullptr) && (op2 != nullptr)); + ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2); + return binop; + } + + //------------------------------------------------------------------------ + // NewAddRec: Create a SCEV node that represents a new add recurrence. + // + // Parameters: + // start - Value of the recurrence at the first iteration + // step - Step value of the recurrence + // + // Returns: + // The new node. + // + ScevAddRec* NewAddRec(Scev* start, Scev* step) + { + assert((start != nullptr) && (step != nullptr)); + ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step); + return addRec; + } + + Scev* Analyze(BasicBlock* block, GenTree* tree); + Scev* Simplify(Scev* scev); +}; From 2700cda051b950e94280beb3d5e219c908f1dafd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Feb 2024 12:26:27 +0100 Subject: [PATCH 61/64] Address some feedback --- src/coreclr/jit/clrjit.natvis | 5 + src/coreclr/jit/compiler.cpp | 27 +++- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/inductionvariableopts.cpp | 16 ++- src/coreclr/jit/scev.cpp | 158 ++++++++++++++++++---- src/coreclr/jit/scev.h | 102 ++------------ 6 files changed, 184 insertions(+), 127 deletions(-) diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index 95dd3dc305689b..98c374bea8f33f 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -86,6 +86,11 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u {gtTreeID, d}: [{gtOper,en}, {gtType,en} V{((GenTreeLclFld*)this)->_gtLclNum,u}[+{((GenTreeLclFld*)this)->m_lclOffs,u}]] + + + [{Oper,en}, {Type,en}] + + LinearScan diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index bfd5d285b19674..60b1a316c114aa 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9418,6 +9418,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma comment(linker, "/include:cLoops") #pragma comment(linker, "/include:cLoopsA") #pragma comment(linker, "/include:cLoop") +#pragma comment(linker, "/include:cScev") #pragma comment(linker, "/include:cTreeFlags") #pragma comment(linker, "/include:cVN") @@ -9443,6 +9444,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma comment(linker, "/include:dCVarSet") #pragma comment(linker, "/include:dLoop") #pragma comment(linker, "/include:dLoops") +#pragma comment(linker, "/include:dScev") #pragma comment(linker, "/include:dTreeFlags") #pragma comment(linker, "/include:dVN") @@ -9686,24 +9688,38 @@ JITDBGAPI void __cdecl cCVarSet(Compiler* comp, VARSET_VALARG_TP vars) JITDBGAPI void __cdecl cLoops(Compiler* comp) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called - printf("===================================================================== *NewLoops %u\n", sequenceNumber++); + printf("===================================================================== *Loops %u\n", sequenceNumber++); FlowGraphNaturalLoops::Dump(comp->m_loops); } JITDBGAPI void __cdecl cLoopsA(Compiler* comp, FlowGraphNaturalLoops* loops) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called - printf("===================================================================== *NewLoopsA %u\n", sequenceNumber++); + printf("===================================================================== *LoopsA %u\n", sequenceNumber++); FlowGraphNaturalLoops::Dump(loops); } JITDBGAPI void __cdecl cLoop(Compiler* comp, FlowGraphNaturalLoop* loop) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called - printf("===================================================================== *NewLoop %u\n", sequenceNumber++); + printf("===================================================================== *Loop %u\n", sequenceNumber++); FlowGraphNaturalLoop::Dump(loop); } +JITDBGAPI void __cdecl cScev(Compiler* comp, Scev* scev) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== *Scev %u\n", sequenceNumber++); + if (scev == nullptr) + { + printf(" NULL\n"); + } + else + { + scev->Dump(comp); + } +} + JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called @@ -10294,6 +10310,11 @@ JITDBGAPI void __cdecl dLoop(FlowGraphNaturalLoop* loop) cLoop(JitTls::GetCompiler(), loop); } +JITDBGAPI void __cdecl dScev(Scev* scev) +{ + cScev(JitTls::GetCompiler(), scev); +} + JITDBGAPI void __cdecl dTreeFlags(GenTree* tree) { cTreeFlags(JitTls::GetCompiler(), tree); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b639b04fbd5fe7..f3712bebf21d09 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -42,6 +42,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "jitexpandarray.h" #include "tinyarray.h" #include "valuenum.h" +#include "scev.h" #include "namedintrinsiclist.h" #ifdef LATE_DISASM #include "disasm.h" @@ -7418,7 +7419,7 @@ class Compiler bool initedToConstant, FlowGraphNaturalLoop* loop, ArrayStack& ivUses); - void optBestEffortReplaceNarrowIVUsesWith( + void optBestEffortReplaceNarrowIVUses( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt); void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt); void optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index db3a508296d25c..bba55a7aac0fc4 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -381,7 +381,7 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne } //------------------------------------------------------------------------ -// optBestEffortReplaceNarrowIVUsesWith: Try to find and replace uses of the specified +// optBestEffortReplaceNarrowIVUses: Try to find and replace uses of the specified // SSA def with a new local. // // Parameters: @@ -391,7 +391,11 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne // block - Block to replace in // firstStmt - First statement in "block" to start replacing in // -void Compiler::optBestEffortReplaceNarrowIVUsesWith( +// Remarks: +// This function is best effort; it might not find all uses of the provided +// SSA num, particularly because it does not follow into joins. +// +void Compiler::optBestEffortReplaceNarrowIVUses( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) { JITDUMP("Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum, @@ -409,7 +413,7 @@ void Compiler::optBestEffortReplaceNarrowIVUsesWith( block->VisitRegularSuccs(this, [=](BasicBlock* succ) { if (succ->GetUniquePred(this) == block) { - optBestEffortReplaceNarrowIVUsesWith(lclNum, ssaNum, newLclNum, succ, succ->firstStmt()); + optBestEffortReplaceNarrowIVUses(lclNum, ssaNum, newLclNum, succ, succ->firstStmt()); } return BasicBlockVisit::Continue; @@ -498,7 +502,7 @@ PhaseStatus Compiler::optInductionVariables() scev = scevContext.Simplify(scev); JITDUMP(" => "); - DBEXEC(verbose, scevContext.DumpScev(scev)); + DBEXEC(verbose, scev->Dump(this)); JITDUMP("\n"); if (!scev->OperIs(ScevOper::AddRec)) { @@ -617,8 +621,8 @@ PhaseStatus Compiler::optInductionVariables() if (initStmt != nullptr) { JITDUMP(" Replacing on the way to the loop\n"); - optBestEffortReplaceNarrowIVUsesWith(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, - initStmt->GetNextStmt()); + optBestEffortReplaceNarrowIVUses(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock, + initStmt->GetNextStmt()); } JITDUMP(" Replacing in the loop; %d statements with appearences\n", ivUses.Height()); diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp index 46006dfa1be1c4..e772de04b670be 100644 --- a/src/coreclr/jit/scev.cpp +++ b/src/coreclr/jit/scev.cpp @@ -7,7 +7,6 @@ // induction variables." and also by LLVM's scalar evolution analysis. #include "jitpch.h" -#include "scev.h" //------------------------------------------------------------------------ // GetConstantValue: If this SSA use refers to a constant, then fetch that @@ -62,43 +61,30 @@ bool Scev::GetConstantValue(Compiler* comp, int64_t* cns) return false; } -//------------------------------------------------------------------------ -// ResetForLoop: Reset the internal cache in preparation of scalar -// evolution analysis inside a new loop. -// -// Parameters: -// loop - The loop. -// -void ScalarEvolutionContext::ResetForLoop(FlowGraphNaturalLoop* loop) -{ - m_loop = loop; - m_cache.RemoveAll(); -} - #ifdef DEBUG //------------------------------------------------------------------------ -// DumpScev: Print a scev node to stdout. +// Dump: Print this scev node to stdout. // // Parameters: -// scev - The scev node. +// comp - Compiler instance // -void ScalarEvolutionContext::DumpScev(Scev* scev) +void Scev::Dump(Compiler* comp) { - switch (scev->Oper) + switch (Oper) { case ScevOper::Constant: { - ScevConstant* cns = (ScevConstant*)scev; + ScevConstant* cns = (ScevConstant*)this; printf("%zd", (ssize_t)cns->Value); break; } case ScevOper::Local: { - ScevLocal* invariantLocal = (ScevLocal*)scev; + ScevLocal* invariantLocal = (ScevLocal*)this; printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum); int64_t cns; - if (invariantLocal->GetConstantValue(m_comp, &cns)) + if (invariantLocal->GetConstantValue(comp, &cns)) { printf(" (%lld)", (long long)cns); } @@ -107,9 +93,9 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) case ScevOper::ZeroExtend: case ScevOper::SignExtend: { - ScevUnop* unop = (ScevUnop*)scev; + ScevUnop* unop = (ScevUnop*)this; printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8); - DumpScev(unop->Op1); + unop->Op1->Dump(comp); printf(")"); break; } @@ -117,9 +103,9 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) case ScevOper::Mul: case ScevOper::Lsh: { - ScevBinop* binop = (ScevBinop*)scev; + ScevBinop* binop = (ScevBinop*)this; printf("("); - DumpScev(binop->Op1); + binop->Op1->Dump(comp); const char* op; switch (binop->Oper) { @@ -136,18 +122,18 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) unreached(); } printf(" %s ", op); - DumpScev(binop->Op2); + binop->Op2->Dump(comp); printf(")"); break; } case ScevOper::AddRec: { - ScevAddRec* addRec = (ScevAddRec*)scev; - printf("<" FMT_LP, m_loop->GetIndex()); + ScevAddRec* addRec = (ScevAddRec*)this; + printf("<" FMT_LP, addRec->Loop->GetIndex()); printf(", "); - DumpScev(addRec->Start); + addRec->Start->Dump(comp); printf(", "); - DumpScev(addRec->Step); + addRec->Step->Dump(comp); printf(">"); break; } @@ -157,6 +143,118 @@ void ScalarEvolutionContext::DumpScev(Scev* scev) } #endif +//------------------------------------------------------------------------ +// ScalarEvolutionContext: Construct an instance of a context to do scalar evolution in. +// +// Parameters: +// comp - Compiler instance +// +// Remarks: +// After construction the context should be reset for a new loop by calling +// ResetForLoop. +// +ScalarEvolutionContext::ScalarEvolutionContext(Compiler* comp) + : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts)) +{ +} + +//------------------------------------------------------------------------ +// ResetForLoop: Reset the internal cache in preparation of scalar +// evolution analysis inside a new loop. +// +// Parameters: +// loop - The loop. +// +void ScalarEvolutionContext::ResetForLoop(FlowGraphNaturalLoop* loop) +{ + m_loop = loop; + m_cache.RemoveAll(); +} + +//------------------------------------------------------------------------ +// NewConstant: Create a SCEV node that represents a constant. +// +// Returns: +// The new node. +// +ScevConstant* ScalarEvolutionContext::NewConstant(var_types type, int64_t value) +{ + ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value); + return constant; +} + +//------------------------------------------------------------------------ +// NewLocal: Create a SCEV node that represents an invariant local (i.e. a +// use of an SSA def from outside the loop). +// +// Parameters: +// lclNum - The local +// ssaNum - The SSA number of the def outside the loop that is being used. +// +// Returns: +// The new node. +// +ScevLocal* ScalarEvolutionContext::NewLocal(unsigned lclNum, unsigned ssaNum) +{ + var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); + ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum); + return invariantLocal; +} + +//------------------------------------------------------------------------ +// NewExtension: Create a SCEV node that represents a zero or sign extension. +// +// Parameters: +// oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend) +// targetType - The target type of the extension +// op - The operand being extended. +// +// Returns: +// The new node. +// +ScevUnop* ScalarEvolutionContext::NewExtension(ScevOper oper, var_types targetType, Scev* op) +{ + assert(op != nullptr); + ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op); + return ext; +} + +//------------------------------------------------------------------------ +// NewBinop: Create a SCEV node that represents a binary operation. +// +// Parameters: +// oper - The operation +// op1 - First operand +// op2 - Second operand +// +// Returns: +// The new node. +// +ScevBinop* ScalarEvolutionContext::NewBinop(ScevOper oper, Scev* op1, Scev* op2) +{ + assert((op1 != nullptr) && (op2 != nullptr)); + ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2); + return binop; +} + +//------------------------------------------------------------------------ +// NewAddRec: Create a SCEV node that represents a new add recurrence. +// +// Parameters: +// loop - The loop where this add recurrence is evolving +// start - Value of the recurrence at the first iteration +// step - Step value of the recurrence +// +// Returns: +// The new node. +// +ScevAddRec* ScalarEvolutionContext::NewAddRec(Scev* start, Scev* step) +{ + assert((start != nullptr) && (step != nullptr)); + ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step DEBUGARG(m_loop)); + return addRec; +} + //------------------------------------------------------------------------ // CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree: // either an invariant local use or a constant. diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h index 29172f5d63bfba..a2c47c2b5f1464 100644 --- a/src/coreclr/jit/scev.h +++ b/src/coreclr/jit/scev.h @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#pragma once + // This file contains the definition of the scalar evolution IR. This IR allows // representing the values of IR nodes inside loops in a closed form, taking // into account that they are changing on each loop iteration. The IR is based @@ -54,6 +56,10 @@ struct Scev } bool GetConstantValue(Compiler* comp, int64_t* cns); + +#ifdef DEBUG + void Dump(Compiler* comp); +#endif }; struct ScevConstant : Scev @@ -101,12 +107,14 @@ struct ScevBinop : ScevUnop // "Start" and "Step" are guaranteed to be invariant in "Loop". struct ScevAddRec : Scev { - ScevAddRec(var_types type, Scev* start, Scev* step) : Scev(ScevOper::AddRec, type), Start(start), Step(step) + ScevAddRec(var_types type, Scev* start, Scev* step DEBUGARG(FlowGraphNaturalLoop* loop)) + : Scev(ScevOper::AddRec, type), Start(start), Step(step) DEBUGARG(Loop(loop)) { } Scev* const Start; Scev* const Step; + INDEBUG(FlowGraphNaturalLoop* const Loop); }; typedef JitHashTable, Scev*> ScalarEvolutionMap; @@ -130,95 +138,15 @@ class ScalarEvolutionContext Scev* CreateScevForConstant(GenTreeIntConCommon* tree); public: - ScalarEvolutionContext(Compiler* comp) : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts)) - { - } + ScalarEvolutionContext(Compiler* comp); void ResetForLoop(FlowGraphNaturalLoop* loop); - void DumpScev(Scev* scev); - - //------------------------------------------------------------------------ - // NewConstant: Create a SCEV node that represents a constant. - // - // Returns: - // The new node. - // - ScevConstant* NewConstant(var_types type, int64_t value) - { - ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value); - return constant; - } - - //------------------------------------------------------------------------ - // NewLocal: Create a SCEV node that represents an invariant local (i.e. a - // use of an SSA def from outside the loop). - // - // Parameters: - // lclNum - The local - // ssaNum - The SSA number of the def outside the loop that is being used. - // - // Returns: - // The new node. - // - ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum) - { - var_types type = genActualType(m_comp->lvaGetDesc(lclNum)); - ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum); - return invariantLocal; - } - //------------------------------------------------------------------------ - // NewExtension: Create a SCEV node that represents a zero or sign extension. - // - // Parameters: - // oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend) - // targetType - The target type of the extension - // op - The operand being extended. - // - // Returns: - // The new node. - // - ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op) - { - assert(op != nullptr); - ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op); - return ext; - } - - //------------------------------------------------------------------------ - // NewBinop: Create a SCEV node that represents a binary operation. - // - // Parameters: - // oper - The operation - // op1 - First operand - // op2 - Second operand - // - // Returns: - // The new node. - // - ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2) - { - assert((op1 != nullptr) && (op2 != nullptr)); - ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2); - return binop; - } - - //------------------------------------------------------------------------ - // NewAddRec: Create a SCEV node that represents a new add recurrence. - // - // Parameters: - // start - Value of the recurrence at the first iteration - // step - Step value of the recurrence - // - // Returns: - // The new node. - // - ScevAddRec* NewAddRec(Scev* start, Scev* step) - { - assert((start != nullptr) && (step != nullptr)); - ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step); - return addRec; - } + ScevConstant* NewConstant(var_types type, int64_t value); + ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum); + ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op); + ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2); + ScevAddRec* NewAddRec(Scev* start, Scev* step); Scev* Analyze(BasicBlock* block, GenTree* tree); Scev* Simplify(Scev* scev); From 0f8d2b296dc085fcd685f8cf8ab22a74f374a356 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Feb 2024 13:19:43 +0100 Subject: [PATCH 62/64] Add some docs --- src/coreclr/jit/inductionvariableopts.cpp | 33 +++++++++++--- src/coreclr/jit/scev.cpp | 54 +++++++++++++++++++++++ src/coreclr/jit/scev.h | 2 + 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index bba55a7aac0fc4..97d3e0474eaa61 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1,11 +1,34 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// This file contains code to analyze how the value of induction variables -// evolve (scalar evolution analysis) and to do optimizations based on it. -// Currently the only optimization done is IV widening. -// The scalar evolution analysis is inspired by "Michael Wolfe. 1992. Beyond -// induction variables." and also by LLVM's scalar evolution. +// This file contains code to optimize induction variables in loops based on +// scalar evolution analysis (see scev.h and scev.cpp for more information +// about the scalar evolution analysis). +// +// Currently the only optimization done is widening of primary induction +// variables from 32 bits into 64 bits. This is generally only profitable on +// x64 that does not allow zero extension of 32-bit values in addressing modes +// (in contrast, arm64 does have the capability of including zero extensions in +// addressing modes). For x64 this saves a zero extension for every array +// access inside the loop, in exchange for some widening or narrowing stores +// outside the loop: +// - To make sure the new widened IV starts at the right value it is +// initialized to the value of the narrow IV outside the loop (either in the +// preheader or at the def location of the narrow IV). Usually the start +// value is a constant, in which case the widened IV is just initialized to +// the constant value. +// - If the narrow IV is used after the loop we need to store it back from +// the widened IV in the exits. We depend on liveness sets to figure out +// which exits to insert IR into. +// +// These steps ensure that the wide IV has the right value to begin with and +// the old narrow IV still has the right value after the loop. Additionally, +// we must replace every use of the narrow IV inside the loop with the widened +// IV. This is done by a traversal of the IR inside the loop. We do not +// actually widen the uses of the IV; rather, we keep all uses and defs as +// 32-bit, which the backend is able to handle efficiently on x64. Because of +// this we do not need to worry about overflow. +// #include "jitpch.h" #include "scev.h" diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp index e772de04b670be..81760593a8aba8 100644 --- a/src/coreclr/jit/scev.cpp +++ b/src/coreclr/jit/scev.cpp @@ -5,6 +5,60 @@ // evolve (scalar evolution analysis), and to turn them into the SCEV IR // defined in scev.h. The analysis is inspired by "Michael Wolfe. 1992. Beyond // induction variables." and also by LLVM's scalar evolution analysis. +// +// The main idea of scalar evolution nalysis is to give a closed form +// describing the value of tree nodes inside loops even when taking into +// account that they are changing on each loop iteration. This is useful for +// optimizations that want to reason about values of IR nodes inside loops, +// such as IV widening or strength reduction. +// +// To represent the possibility of evolution the SCEV IR includes the concept +// of an add recurrence , which describes a value that +// starts at "start" and changes by adding "step" at each iteration. The IR +// nodes that change in this way (or depend on something that changes in this +// way) are generally called induction variables. +// +// An add recurrence arises only when a local exists in the loop that is +// mutated in each iteration. Such a local will naturally end up with a phi +// node in the loop header. These locals are called primary (or basic) +// induction variables. The non-primary IVs (which always must depend on the +// primary IVs) are sometimes called secondary IVs. +// +// The job of the analysis is to go from a tree node to a SCEV node that +// describes its value (possibly taking its evolution into account). Note that +// SCEV nodes are immutable and the values they represent are _not_ +// flow-dependent; that is, they don't exist at a specific location inside the +// loop, even though some particular tree node gave rise to that SCEV node. The +// analysis itself _is_ flow-dependent and guarantees that the Scev* returned +// describes the value that corresponds to what the tree node computes at its +// specific location. However, it would be perfectly legal for two trees at +// different locations in the loop to analyze to the same SCEV node (even +// potentially returning the same pointer). For example, in theory "i" and "j" +// in the following loop would both be represented by the same add recurrence +// , and the analysis could even return the same Scev* for both of +// them, even if it does not today: +// +// int i = 0; +// while (true) +// { +// i++; +// ... +// int j = i - 1; +// } +// +// Actually materializing the value of a SCEV node back into tree IR is not +// implemented yet, but generally would depend on the availability of tree +// nodes that compute the dependent values at the point where the IR is to be +// materialized. +// +// Besides the add recurrences the analysis itself is generally a +// straightforward translation from JIT IR into the SCEV IR. Creating the add +// recurrences requires paying attention to the structure of PHIs, and +// disambiguating the values coming from outside the loop and the values coming +// from the backedges. Currently only simplistic add recurrences that do not +// require recursive analysis are supported. These simplistic add recurrences +// are always on the form i = i + k. +// #include "jitpch.h" diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h index a2c47c2b5f1464..603088d9623661 100644 --- a/src/coreclr/jit/scev.h +++ b/src/coreclr/jit/scev.h @@ -12,6 +12,8 @@ // evolving in, "start" is the initial value and "step" is the step by which // the value evolves in every iteration. // +// See scev.cpp for further documentation. +// enum class ScevOper { Constant, From 3e3759ee0d3f15676f0bd20175ff08a87ffa65af Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Feb 2024 13:25:48 +0100 Subject: [PATCH 63/64] More remarks --- src/coreclr/jit/inductionvariableopts.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 97d3e0474eaa61..d30202680976e0 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -416,7 +416,13 @@ void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned ne // // Remarks: // This function is best effort; it might not find all uses of the provided -// SSA num, particularly because it does not follow into joins. +// SSA num, particularly because it does not follow into joins. Note that we +// only use this to replace uses of the narrow IV outside the loop; inside +// the loop we do ensure that all uses/defs are replaced. +// Keeping it best-effort outside the loop is ok; there is no correctness +// issue since we do not invalidate the value of the old narrow IV in any +// way, but it may mean we end up leaving the narrow IV live concurrently +// with the new widened IV, increasing register pressure. // void Compiler::optBestEffortReplaceNarrowIVUses( unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt) From 3ec083f78a2bd6797bfec0924565fa85b29f7a51 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Feb 2024 13:31:10 +0100 Subject: [PATCH 64/64] Expand ryujit markdown docs with new phase --- docs/design/coreclr/jit/ryujit-overview.md | 6 ++++++ docs/design/coreclr/jit/ryujit-tutorial.md | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/docs/design/coreclr/jit/ryujit-overview.md b/docs/design/coreclr/jit/ryujit-overview.md index cdb17002ee1974..5e63d38e98f664 100644 --- a/docs/design/coreclr/jit/ryujit-overview.md +++ b/docs/design/coreclr/jit/ryujit-overview.md @@ -222,6 +222,7 @@ The top-level function of interest is `Compiler::compCompile`. It invokes the fo | [Common Subexpression Elimination (CSE)](#cse) | Elimination of redundant subexressions based on value numbers. | | [Assertion Propagation](#assertion-propagation) | Utilizes value numbers to propagate and transform based on properties such as non-nullness. | | [Range analysis](#range-analysis) | Eliminate array index range checks based on value numbers and assertions | +| [Induction variable optimization](#iv-opts) | Optimize induction variables used inside natural loops based on scalar evolution analysis | | [VN-based dead store elimination](#vn-based-dead-store-elimination) | Eliminate stores that do not change the value of a local. | | [If conversion](#if-conversion) | Transform conditional definitions into `GT_SELECT` operators. | | [Rationalization](#rationalization) | Flowgraph order changes from `FGOrderTree` to `FGOrderLinear`. All `GT_COMMA` nodes are transformed. | @@ -347,6 +348,11 @@ reused. Utilizes value numbers to propagate and transform based on properties such as non-nullness. +### Induction variable optimization + +Performs scalar evolution analysis and utilized it to optimize induction variables inside loops. +Currently this entails IV widening which is done on x64 only. + ### Range analysis Optimize array index range checks based on value numbers and assertions. diff --git a/docs/design/coreclr/jit/ryujit-tutorial.md b/docs/design/coreclr/jit/ryujit-tutorial.md index 34466e45afbcdc..ec900ccc8cd937 100644 --- a/docs/design/coreclr/jit/ryujit-tutorial.md +++ b/docs/design/coreclr/jit/ryujit-tutorial.md @@ -447,6 +447,10 @@ This is the same diagram as before, but with additional links to indicate execut - Determine initial value for dependent phis - Eliminate checks where the range of the index is within the check range +### Induction Variable Optimization +- Perform scalar evolution analysis to describe values of IR nodes inside loops +- Perform IV widening on x64 to avoid unnecessary zero extensions for array/span indexing + ## RyuJIT Back-End ### Rationalization