From 1e7543302b7b22742b2cfb6e2d5eb62254e0f795 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Wed, 15 Jun 2022 20:53:32 +0300 Subject: [PATCH 1/2] Unix x64: local morph --- src/coreclr/jit/lclmorph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 3b1b635059afd4..1bdbe2ba5d08cb 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1096,7 +1096,7 @@ class LocalAddressVisitor final : public GenTreeVisitor // | Partial | LCL_FLD | OBJ/LCL_FLD | LCL_FLD | // |------------|---------|-------------|---------| // - // * - On Windows x64 only. + // * - On Windows x64/Unix x64 only. // // |------------|------|------|--------|----------| // | SIMD | CALL | ASG | RETURN | HWI/SIMD | @@ -1114,9 +1114,9 @@ class LocalAddressVisitor final : public GenTreeVisitor if (user->IsCall()) { -#ifndef WINDOWS_AMD64_ABI +#if !defined(WINDOWS_AMD64_ABI) && !defined(UNIX_AMD64_ABI) return IndirTransform::None; -#endif // !WINDOWS_AMD64_ABI +#endif // !defined(WINDOWS_AMD64_ABI) && !defined(UNIX_AMD64_ABI) } if (match == StructMatch::Compatible) From 6b509692c9cf8a9a6e1267de052b52a15555bb1a Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Fri, 17 Jun 2022 16:26:33 +0300 Subject: [PATCH 2/2] Morph and costs tuning Required to avoid regressions due to args sorting. --- src/coreclr/jit/gentree.cpp | 26 +++++++++++++++----------- src/coreclr/jit/morph.cpp | 19 +++++++++++-------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 19e551f1f8946a..56deddf50242c6 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4654,12 +4654,18 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) { costEx = IND_COST_EX; costSz = 2; - /* Sign-extend and zero-extend are more expensive to load */ + + // Some types are more expensive to load than others. if (varTypeIsSmall(tree->TypeGet())) { costEx += 1; costSz += 1; } + else if (tree->TypeIs(TYP_STRUCT)) + { + costEx += 2 * IND_COST_EX; + costSz += 2 * 2; + } } #if defined(TARGET_AMD64) // increase costSz for floating point locals @@ -4685,8 +4691,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } else if (tree->TypeIs(TYP_STRUCT)) { - costEx += IND_COST_EX; - costSz += 2; + costEx += 2 * IND_COST_EX; + costSz += 2 * 2; } break; @@ -4916,17 +4922,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; case GT_ADDR: + if (op1->OperIsLocalRead()) + { + costEx = 3; + costSz = 3; + goto DONE; + } costEx = 0; costSz = 1; - - // If we have a GT_ADDR of an GT_IND we can just copy the costs from indOp1 - if (op1->OperGet() == GT_IND) - { - GenTree* indOp1 = op1->AsOp()->gtOp1; - costEx = indOp1->GetCostEx(); - costSz = indOp1->GetCostSz(); - } break; case GT_ARR_LENGTH: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f8c7b0806e0e37..129ffd013efd55 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1292,7 +1292,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) GenTree* argx = arg->GetEarlyNode(); assert(argx != nullptr); - if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) + // As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below. + if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { noway_assert(curInx <= endTab); @@ -1338,9 +1339,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(argx != nullptr); // We should have already handled these kinds of args - assert(argx->gtOper != GT_LCL_VAR); - assert(argx->gtOper != GT_LCL_FLD); - assert(argx->gtOper != GT_CNS_INT); + assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) && + !argx->OperIs(GT_CNS_INT)); // This arg should either have no persistent side effects or be the last one in our table // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); @@ -3203,6 +3203,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // - This is irrelevant for X86, since structs are always passed by value on the stack. GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj); + bool argIsLocal = (lclVar != nullptr) || argObj->OperIsLocalRead(); bool canTransform = false; if (structBaseType != TYP_STRUCT) @@ -3220,7 +3221,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // or UNIX_AMD64_ABI cases where they will be passed in registers. else { - canTransform = (lclVar != nullptr); + canTransform = argIsLocal; passingSize = genTypeSize(structBaseType); } #endif // TARGET_ARM64 || UNIX_AMD64_ABI || TARGET_LOONGARCH64 @@ -3250,7 +3251,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) makeOutArgCopy = true; } } - else if (lclVar == nullptr) + else if (!argIsLocal) { // This should only be the case of a value directly producing a known struct type. assert(argObj->TypeGet() != TYP_STRUCT); @@ -3728,9 +3729,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { argNode = fgMorphLclArgToFieldlist(lcl); } +#ifndef TARGET_XARCH else if (argNode->TypeGet() == TYP_STRUCT) { - // If this is a non-register struct, it must be referenced from memory. + // ARM/ARM64/LoongArch64 backends do not support local nodes as sources of some stack args. if (!actualArg->OperIs(GT_OBJ)) { // Create an Obj of the temp to use it as a call argument. @@ -3738,8 +3740,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) argNode = gtNewObjNode(lvaGetStruct(lcl->GetLclNum()), argNode); } // Its fields will need to be accessed by address. - lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUG_ARG(DoNotEnregisterReason::IsStructArg)); + lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg)); } +#endif // !TARGET_XARCH } return argNode;