From e6c638e94a79caac9d34a28251b607ef0a7576b6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 11:40:08 +0200 Subject: [PATCH 1/8] JIT: Support last-use copy omission for commas Allow last-use copy omission to kick in when the argument is a comma (typically created by physical promotion). Also handle this pattern in arg evaluation: avoid creating a new local for the COMMA(..., LCL_ADDR) created for this pattern. As part of this rename the "needs temp" terminology to "needs early evaluation" since these cases do not actually need or create a temp. There were other cases already where we did not create a temp (specifically handling of GT_MKREFANY), so the terminology was already a bit misleading. Fix #87471 --- src/coreclr/jit/gentree.cpp | 6 +-- src/coreclr/jit/gentree.h | 17 +++--- src/coreclr/jit/morph.cpp | 102 ++++++++++++++++++++++-------------- 3 files changed, 76 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 28450f313219b0..8d37fd4c7bd20e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1468,7 +1468,7 @@ CallArgs::CallArgs() , m_hasRegArgs(false) , m_hasStackArgs(false) , m_argsComplete(false) - , m_needsTemps(false) + , m_needsEarlyEvaluation(false) #ifdef UNIX_X86_ABI , m_alignmentDone(false) #endif @@ -9296,7 +9296,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co m_hasRegArgs = other->m_hasRegArgs; m_hasStackArgs = other->m_hasStackArgs; m_argsComplete = other->m_argsComplete; - m_needsTemps = other->m_needsTemps; + m_needsEarlyEvaluation = other->m_needsEarlyEvaluation; // Unix x86 flags related to stack alignment intentionally not copied as // they depend on where the call will be inserted. @@ -9311,7 +9311,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co carg->m_tmpNum = arg.m_tmpNum; carg->m_signatureType = arg.m_signatureType; carg->m_wellKnownArg = arg.m_wellKnownArg; - carg->m_needTmp = arg.m_needTmp; + carg->m_evaluateEarly = arg.m_evaluateEarly; carg->m_needPlace = arg.m_needPlace; carg->m_isTmp = arg.m_isTmp; carg->m_processed = arg.m_processed; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 3ac070c88569dc..0d05564e22af73 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4571,8 +4571,9 @@ class CallArg var_types m_signatureType : 5; // The type of well-known argument this is. WellKnownArg m_wellKnownArg : 5; - // True when we force this argument's evaluation into a temp LclVar. - bool m_needTmp : 1; + // True if this argument needs to be evaluated in the early list (usually + // by introducing a copy into a temp LclVar). + bool m_evaluateEarly : 1; // True when we must replace this argument with a placeholder node. bool m_needPlace : 1; // True when we setup a temp LclVar for this argument. @@ -4590,7 +4591,7 @@ class CallArg , m_tmpNum(BAD_VAR_NUM) , m_signatureType(TYP_UNDEF) , m_wellKnownArg(WellKnownArg::None) - , m_needTmp(false) + , m_evaluateEarly(false) , m_needPlace(false) , m_isTmp(false) , m_processed(false) @@ -4679,8 +4680,8 @@ class CallArgs // True if we have one or more stack arguments. bool m_hasStackArgs : 1; bool m_argsComplete : 1; - // One or more arguments must be copied to a temp by EvalArgsToTemps. - bool m_needsTemps : 1; + // One or more arguments must be evaluated early by EvalArgsEarly. + bool m_needsEarlyEvaluation : 1; #ifdef UNIX_X86_ABI // Updateable flag, set to 'true' after we've done any required alignment. bool m_alignmentDone : 1; @@ -4736,8 +4737,8 @@ class CallArgs void AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call); void ArgsComplete(Compiler* comp, GenTreeCall* call); - void EvalArgsToTemps(Compiler* comp, GenTreeCall* call); - void SetNeedsTemp(CallArg* arg); + void EvalArgsEarly(Compiler* comp, GenTreeCall* call); + void SetEvaluateEarly(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg); @@ -4753,7 +4754,7 @@ class CallArgs bool AreArgsComplete() const { return m_argsComplete; } bool HasRegArgs() const { return m_hasRegArgs; } bool HasStackArgs() const { return m_hasStackArgs; } - bool NeedsTemps() const { return m_needsTemps; } + bool NeedsEarlyEvaluation() const { return m_needsEarlyEvaluation; } #ifdef UNIX_X86_ABI void ComputeStackAlignment(unsigned curStackLevelInBytes) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8e4c6612b41a10..4e1016f65c5716 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -767,9 +767,9 @@ void CallArg::Dump(Compiler* comp) { printf(", isSplit"); } - if (m_needTmp) + if (m_evaluateEarly) { - printf(", tmpNum=V%02u", m_tmpNum); + printf(", evaluateEarly", m_tmpNum); } if (m_needPlace) { @@ -778,6 +778,7 @@ void CallArg::Dump(Compiler* comp) if (m_isTmp) { printf(", isTmp"); + printf(", tmpNum=V%02u", m_tmpNum); } if (m_processed) { @@ -917,7 +918,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // for FEATURE_FIXED_OUT_ARGS. if (canEvalToTemp && ((argCount > 1) || argx->OperIsCopyBlkOp() || (FEATURE_FIXED_OUT_ARGS && arg.m_isTmp))) { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } else { @@ -944,7 +945,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if ((prevArg.GetEarlyNode() != nullptr) && !prevArg.GetEarlyNode()->IsInvariant()) { - SetNeedsTemp(&prevArg); + SetEvaluateEarly(&prevArg); } } } @@ -996,12 +997,12 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) { if (argCount > 1) // If this is not the only argument { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) { // Spill all arguments that are floating point calls - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } } @@ -1026,7 +1027,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // we require that they be evaluated into a temp if ((prevArg.GetEarlyNode() != nullptr) && ((prevArg.GetEarlyNode()->gtFlags & GTF_ALL_EFFECT) != 0)) { - SetNeedsTemp(&prevArg); + SetEvaluateEarly(&prevArg); } #if FEATURE_FIXED_OUT_ARGS // Or, if they are stored into the FIXED_OUT_ARG area @@ -1093,7 +1094,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if ((prevArg.GetEarlyNode() != nullptr) && ((prevArg.GetEarlyNode()->gtFlags & GTF_EXCEPT) != 0)) { - SetNeedsTemp(&prevArg); + SetEvaluateEarly(&prevArg); } } } @@ -1115,12 +1116,12 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) { // Spill multireg struct arguments that have Assignments or Calls embedded in them. - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } else if (!argx->OperIsLocalRead() && !argx->OperIsIndir()) { // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } else { @@ -1130,7 +1131,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if (argx->GetCostEx() > (6 * IND_COST_EX)) { // Spill multireg struct arguments that are expensive to evaluate twice. - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } } } @@ -1153,7 +1154,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if (!arg.AbiInfo.IsSplit() || (structSize <= 16)) #endif // TARGET_ARM { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } } } @@ -1191,7 +1192,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // Examine the register args that are currently not marked needTmp // - if (!arg.m_needTmp && (arg.AbiInfo.GetRegNum() != REG_STK)) + if (!arg.m_evaluateEarly && (arg.AbiInfo.GetRegNum() != REG_STK)) { if (hasStackArgsWeCareAbout) { @@ -1204,7 +1205,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // if (argx->gtFlags & GTF_EXCEPT) { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); continue; } #else @@ -1216,7 +1217,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) if (comp->gtTreeContainsOper(argx, GT_LCLHEAP)) { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); continue; } } @@ -1235,13 +1236,13 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) { // Always evaluate 'this' to temp. assert(HasThisPointer()); - SetNeedsTemp(GetThisArg()); + SetEvaluateEarly(GetThisArg()); for (CallArg& arg : EarlyArgs()) { if ((arg.GetEarlyNode()->gtFlags & GTF_ALL_EFFECT) != 0) { - SetNeedsTemp(&arg); + SetEvaluateEarly(&arg); } } } @@ -1397,7 +1398,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // if (!arg->m_processed) { - if (arg->m_needTmp) + if (arg->m_evaluateEarly) { arg->m_processed = true; @@ -1589,7 +1590,8 @@ GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg) } //------------------------------------------------------------------------------ -// EvalArgsToTemps: Handle arguments that were marked as requiring temps. +// EvalArgsEarly: Handle arguments that were marked as needing to be evaluated +// early. // // Remarks: // This is the main function responsible for assigning late nodes in arguments. @@ -1613,7 +1615,7 @@ GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg) // Arguments that are passed on stack and that do not need an explicit // assignment in the early node list do not require any late node. // -void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) +void CallArgs::EvalArgsEarly(Compiler* comp, GenTreeCall* call) { CallArg* inlineTable[32]; size_t numArgs = call->gtArgs.CountArgs(); @@ -1640,20 +1642,32 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) assert(!arg.m_needPlace); // On x86 and other archs that use push instructions to pass arguments: - // Only the register arguments need to be replaced with placeholder nodes. - // Stacked arguments are evaluated and pushed (or stored into the stack) in order. + // Only the register arguments need to be handled by this logic. + // Stack arguments are evaluated and pushed (or stored into the stack) in order. // if (arg.AbiInfo.GetRegNum() == REG_STK) continue; #endif - if (arg.m_needTmp) + if (arg.m_evaluateEarly) { if (arg.m_isTmp) { // Create a copy of the temp to go into the late argument list defArg = MakeTmpArgNode(comp, &arg); } + // We may have marked the arg as "evaluate early" because of a + // comma, but if the effective node is invariant we can skip + // creating the temp as an optimization but still extract the side + // effects as the setup to ensure correct ordering. + else if (comp->opts.OptimizationEnabled() && argx->gtEffectiveVal()->IsInvariant()) + { + defArg = argx->gtEffectiveVal(); + comp->gtExtractSideEffList(argx, &setupArg); + + // Reset it here and update it to the new setupArg below if non-null. + arg.SetEarlyNode(nullptr); + } else { // Create a temp assignment for the argument @@ -1839,12 +1853,13 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) } //------------------------------------------------------------------------------ -// SetNeedsTemp: Set the specified argument as requiring evaluation into a temp. +// SetEvaluateEarly: Set the specified argument as requiring early evaluation +// (usually copying it into a temp). // -void CallArgs::SetNeedsTemp(CallArg* arg) +void CallArgs::SetEvaluateEarly(CallArg* arg) { - arg->m_needTmp = true; - m_needsTemps = true; + arg->m_evaluateEarly = true; + m_needsEarlyEvaluation = true; } //------------------------------------------------------------------------------ @@ -3153,7 +3168,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // up. if (!reMorphing && call->IsExpandedEarly() && call->IsVirtualVtable() && !argx->OperIsLocal()) { - call->gtArgs.SetNeedsTemp(&arg); + call->gtArgs.SetEvaluateEarly(&arg); } } @@ -3478,10 +3493,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // If we are remorphing or don't have any register arguments or other arguments that need // temps, then we don't need to call SortArgs() and EvalArgsToTemps(). // - if (!reMorphing && (call->gtArgs.HasRegArgs() || call->gtArgs.NeedsTemps())) + if (!reMorphing && (call->gtArgs.HasRegArgs() || call->gtArgs.NeedsEarlyEvaluation())) { // Do the 'defer or eval to temp' analysis. - call->gtArgs.EvalArgsToTemps(this, call); + call->gtArgs.EvalArgsEarly(this, call); } if (hasMultiregStructArgs) @@ -3929,14 +3944,21 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) // if (opts.OptimizationEnabled() && arg->AbiInfo.PassedByRef) { + GenTree** effectiveUse = &arg->EarlyNodeRef(); + while ((*effectiveUse)->OperIs(GT_COMMA)) + { + effectiveUse = &(*effectiveUse)->AsOp()->gtOp2; + } + GenTree* effectiveArg = *effectiveUse; + GenTree* implicitByRefLclAddr; GenTreeLclVarCommon* implicitByRefLcl = - argx->IsImplicitByrefParameterValuePostMorph(this, &implicitByRefLclAddr); + effectiveArg->IsImplicitByrefParameterValuePostMorph(this, &implicitByRefLclAddr); GenTreeLclVarCommon* lcl = implicitByRefLcl; - if ((lcl == nullptr) && argx->OperIsLocal()) + if ((lcl == nullptr) && effectiveArg->OperIsLocal()) { - lcl = argx->AsLclVarCommon(); + lcl = effectiveArg->AsLclVarCommon(); } if (lcl != nullptr) @@ -3967,7 +3989,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) { if (implicitByRefLcl != nullptr) { - arg->SetEarlyNode(implicitByRefLclAddr); + *effectiveUse = implicitByRefLclAddr; } else { @@ -3992,6 +4014,12 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) fgRemarkGlobalUses = true; } + // If the arg was a comma then make sure we retype the whole comma chain. + if (argx->OperIs(GT_COMMA)) + { + argx->ChangeType((*effectiveUse)->TypeGet()); + } + JITDUMP("did not need to make outgoing copy for last use of V%02d\n", varNum); return; } @@ -7927,13 +7955,11 @@ GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call) // fgMorphArgs must enforce this invariant by creating a temp // - assert(thisPtr->OperIsLocal()); + assert(thisPtr->OperIsLocal() || thisPtr->IsInvariant()); // Make a copy of the thisPtr by cloning // - thisPtr = gtClone(thisPtr, true); - - noway_assert(thisPtr != nullptr); + thisPtr = gtCloneExpr(thisPtr); // Get hold of the vtable offset unsigned vtabOffsOfIndirection; From 7be14865a6a555bff4ffd55aeae0c066088da6f0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 12:14:38 +0200 Subject: [PATCH 2/8] Update docs, comments --- docs/design/coreclr/jit/jit-call-morphing.md | 47 +++++++++++--------- src/coreclr/jit/morph.cpp | 32 ++++++------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/docs/design/coreclr/jit/jit-call-morphing.md b/docs/design/coreclr/jit/jit-call-morphing.md index 2e83a9b0643b0a..dc27c9d824d983 100644 --- a/docs/design/coreclr/jit/jit-call-morphing.md +++ b/docs/design/coreclr/jit/jit-call-morphing.md @@ -27,11 +27,13 @@ new LclVar to preserve the order of evaluation rule. Each argument is an arbitrary expression tree. The JIT tracks a summary of observable side-effects using a set of five bit flags in every GenTree node: `GTF_ASG`, `GTF_CALL`, `GTF_EXCEPT`, `GTF_GLOB_REF`, and `GTF_ORDER_SIDEEFF`. These flags are propagated up the tree so that the top node has a particular -flag set if any of its child nodes has the flag set. Decisions about whether to evaluate arguments -into temp LclVars are made by examining these flags on each of the arguments. +flag set if any of its child nodes has the flag set. Decisions about whether we need to take special +case to evaluate arguments in order are made by examining these flags on each of the arguments. +Typically, evaluating an argument early means creating a temporary local and assigning it as part of +the early list in the GenTreeCall node. -*Our design goal for call sites is to create a few temp LclVars as possible, while preserving the +*Our design goal for call sites is to create as few temp LclVars as possible, while preserving the order of evaluation rules of IL and C#.* @@ -77,7 +79,7 @@ them there while pushing some new arguments for a nested call. Thus we allow ne calls for x86 but do not allow them for the other architectures. -Rules for when Arguments must be evaluated into temp LclVars +Rules for when Arguments must be evaluated early ----------------- During the first Morph phase known as global Morph we call `CallArgs::ArgsComplete()` @@ -85,19 +87,19 @@ after we have completed determining ABI information for each arg. This method ap the following rules: 1. When an argument is marked as containing an assignment using `GTF_ASG`, then we -force all previous non-constant arguments to be evaluated into temps. This is very +force all previous non-constant arguments to be evaluated early. This is very conservative, but at this phase of the JIT it is rare to have an assignment subtree as part of an argument. 2. When an argument is marked as containing a call using the `GTF_CALL` flag, then we force that argument and any previous argument that is marked with any of the -`GTF_ALL_EFFECT` flags into temps. +`GTF_ALL_EFFECT` flags to be evaluated early. * Additionally, for `FEATURE_FIXED_OUT_ARGS`, any previous stack based args that - we haven't marked as needing a temp but still need to store in the outgoing args - area is marked as needing a placeholder temp using `needPlace`. -3. We force any arguments that use `localloc` to be evaluated into temps. + we haven't marked as needing early evaluating but still need to store in the outgoing + args area is marked as needing a placeholder temp using `needPlace`. +3. We force any arguments that use `localloc` to be evaluated early. 4. We mark any address taken locals with the `GTF_GLOB_REF` flag. For two special -cases we call `SetNeedsTemp()` and set up the temp in `fgMorphArgs`. `SetNeedsTemp` -records the tmpNum used and sets `isTmp` so that we handle it like the other temps. +cases we call `CallArgs::SetTemp()` and set up the temp earlier in `fgMorphArgs`. +`CallArgs::SetTemp` records the tmpNum used and sets `isTmp` so that we handle it like the other temps. The special cases are for `GT_MKREFANY` and for a `TYP_STRUCT` argument passed by value when we can't optimize away the extra copy. @@ -120,13 +122,13 @@ LclFlds and put them before the constant args. to the least complex. -Evaluating Args into new LclVar temps and the creation of the LateArgs +Evaluating Args early and the creation of the LateArgs ----------------- -After calling `SortArgs()`, the `EvalArgsToTemps()` method is called to create +After calling `SortArgs()`, the `EvalArgsEarly()` method is called to create the temp assignments and to populate the LateArgs list. -For arguments that are marked as needing a temp: +For arguments that are marked as evaluating early: ----------------- 1. We create an assignment using `gtNewTempAssign`. This assignment replaces @@ -134,12 +136,15 @@ the original argument in the early argument list. After we create the assignmen the argument is marked with `m_isTmp = true`. 2. Arguments that are already marked with `m_isTmp` are treated similarly as above except we don't create an assignment for them. -3. A `TYP_STRUCT` argument passed by value will have `m_isTmp` set to true -and will use a `GT_COPYBLK` or a `GT_COPYOBJ` to perform the assignment of the temp. -4. The assignment node or the CopyBlock node is referred to as `arg1 SETUP` in the JitDump. +3. Some arguments may not need to have temps created, e.g. for a comma +with an invariant effective value but side effects in it. The side effects may +extracted directly to the setup node and the invariant node put in the late arg list. +4. A `TYP_STRUCT` argument passed by value will have `m_isTmp` set to true +and will use a block copy to perform the assignment of the temp. +5. The store or the CopyBlock node is referred to as `arg1 SETUP` in the JitDump. -For arguments that are marked as not needing a temp: +For arguments that are marked as not requiring early evaluation: ----------------- 1. If this is an argument that is passed in a register, then the existing @@ -152,6 +157,6 @@ evaluated directly into the outgoing arg area or pushed on the stack. After the Call node is fully morphed the LateArgs list will contain the arguments passed in registers as well as additional ones for `m_needPlace` marked arguments whenever we have a nested call for a stack based argument. -When `m_needTmp` is true the LateArg will be a LclVar that was created -to evaluate the arg (single-def/single-use). When `m_needTmp` is false -the LateArg can be an arbitrary expression tree. +When `m_evaluateEarly` is true the LateArg will be a LclVar that was created +to evaluate the arg (single-def/single-use) or a simple invariant tree. +When `m_evaluateEarly` is false the LateArg can be an arbitrary expression tree. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4e1016f65c5716..16acb5a56cdd5e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1109,7 +1109,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not // currently handle in multi-reg morphing. // - if (varTypeIsStruct(argx) && !arg.m_needTmp) + if (varTypeIsStruct(argx) && !arg.m_evaluateEarly) { if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1)) { @@ -1190,7 +1190,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) GenTree* argx = arg.GetEarlyNode(); assert(!comp->gtTreeContainsOper(argx, GT_QMARK)); - // Examine the register args that are currently not marked needTmp + // Examine the register args that are currently not marked evaluateEarly // if (!arg.m_evaluateEarly && (arg.AbiInfo.GetRegNum() != REG_STK)) { @@ -1272,17 +1272,17 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // more complex arguments before the simpler arguments. We use the late // list to keep the sorted result at this point, and the ordering ends up // looking like: - // +------------------------------------+ <--- end of sortedArgs - // | constants | - // +------------------------------------+ - // | local var / local field | - // +------------------------------------+ - // | remaining arguments sorted by cost | - // +------------------------------------+ - // | temps (CallArg::m_needTmp == true) | - // +------------------------------------+ - // | args with calls (GTF_CALL) | - // +------------------------------------+ <--- start of sortedArgs + // +------------------------------------------+ <--- end of sortedArgs + // | constants | + // +------------------------------------------+ + // | local var / local field | + // +------------------------------------------+ + // | remaining arguments sorted by cost | + // +------------------------------------------+ + // | temps (CallArg::m_evaluateEarly == true) | + // +------------------------------------------+ + // | args with calls (GTF_CALL) | + // +------------------------------------------+ <--- start of sortedArgs // unsigned argCount = 0; @@ -1771,13 +1771,13 @@ void CallArgs::EvalArgsEarly(Compiler* comp, GenTreeCall* call) #endif } } - else // curArgTabEntry->needTmp == false + else { // On x86 - - // Only register args are replaced with placeholder nodes + // Only register args are are moved to late nodes // and the stack based arguments are evaluated and pushed in order. // - // On Arm/x64 - When needTmp is false and needPlace is false, + // On Arm/x64 - When evaluateEarly is false and needPlace is false, // the non-register arguments are evaluated and stored in order. // When needPlace is true we have a nested call that comes after // this argument so we have to replace it in the gtCallArgs list From 8d50315e952e40fa400f20f887f2aaa1157968b0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 15:03:29 +0200 Subject: [PATCH 3/8] Support GT_CNS_VEC in fgMorphMultiregStructArg --- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/gentree.cpp | 92 +++++++++++++++++++++++++++++++++++-- src/coreclr/jit/morph.cpp | 12 +++++ 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 65379c6c8240fb..bf925ee7db3665 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2498,6 +2498,8 @@ class Compiler GenTree* gtNewConWithPattern(var_types type, uint8_t pattern); + GenTree* gtNewGenericCon(var_types type, uint8_t* cnsVal); + GenTreeLclVar* gtNewStoreLclVarNode(unsigned lclNum, GenTree* data); GenTreeLclFld* gtNewStoreLclFldNode( diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8d37fd4c7bd20e..23013b9d9d1ee8 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7654,9 +7654,9 @@ GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = T } //------------------------------------------------------------------------ -// CreateInitValue: -// Create an IR node representing a constant value with the specified 8 -// byte character broadcast into all of its bytes. +// gtNewConWithPattern: +// Create an IR node representing a constant value with the specified byte +// broadcast into all of its bytes. // // Parameters: // type - The primitive type. For small types the constant will be @@ -7718,6 +7718,92 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) } } +//------------------------------------------------------------------------ +// gtNewGenericCon: +// Create an IR node representing a constant value of any type. +// +// Parameters: +// type - The primitive type. For small types the constant will be +// zero/sign-extended and a TYP_INT node will be returned. +// cnsVal - Pointer to data +// +// Returns: +// An IR node representing the constant. +// +GenTree* Compiler::gtNewGenericCon(var_types type, uint8_t* cnsVal) +{ + switch (type) + { +#define READ_VALUE(typ) \ + typ val; \ + memcpy(&val, cnsVal, sizeof(typ)); + + case TYP_BOOL: + case TYP_UBYTE: + { + READ_VALUE(uint8_t); + return gtNewIconNode(val); + } + case TYP_BYTE: + { + READ_VALUE(int8_t); + return gtNewIconNode(val); + } + case TYP_SHORT: + { + READ_VALUE(int16_t); + return gtNewIconNode(val); + } + case TYP_USHORT: + { + READ_VALUE(uint16_t); + return gtNewIconNode(val); + } + case TYP_INT: + { + READ_VALUE(int32_t); + return gtNewIconNode(val); + } + case TYP_LONG: + { + READ_VALUE(int64_t); + return gtNewLconNode(val); + } + case TYP_FLOAT: + { + READ_VALUE(float); + return gtNewDconNode(val, TYP_FLOAT); + } + case TYP_DOUBLE: + { + READ_VALUE(double); + return gtNewDconNode(val); + } + case TYP_REF: + case TYP_BYREF: + { + READ_VALUE(target_ssize_t); + return gtNewIconNode(val, type); + } +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: +#if defined(TARGET_XARCH) + case TYP_SIMD32: + case TYP_SIMD64: +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + { + return gtNewVconNode(type, cnsVal); + } + default: + unreached(); + +#undef READ_VALUE + } +} + GenTreeLclVar* Compiler::gtNewStoreLclVarNode(unsigned lclNum, GenTree* data) { LclVarDsc* varDsc = lvaGetDesc(lclNum); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 16acb5a56cdd5e..a1f960d6e4759b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3844,6 +3844,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField)); } + else if (argNode->IsCnsVec()) + { + GenTreeVecCon* vecCon = argNode->AsVecCon(); + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned inx = 0; inx < elemCount; inx++) + { + unsigned offset = elems[inx].Offset; + assert(offset + genTypeSize(elems[inx].Type) <= sizeof(vecCon->gtSimdVal)); + GenTree* cns = gtNewGenericCon(elems[inx].Type, (uint8_t*)&vecCon->gtSimdVal + offset); + newArg->AddField(this, cns, offset, cns->TypeGet()); + } + } else { assert(argNode->OperIsIndir()); From ceef578db64735b7579c14a7dddd43f56829744f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 15:04:39 +0200 Subject: [PATCH 4/8] Nit --- src/coreclr/jit/gentree.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 23013b9d9d1ee8..d34d8a8bffcfd3 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7738,15 +7738,15 @@ GenTree* Compiler::gtNewGenericCon(var_types type, uint8_t* cnsVal) typ val; \ memcpy(&val, cnsVal, sizeof(typ)); - case TYP_BOOL: - case TYP_UBYTE: + case TYP_BYTE: { - READ_VALUE(uint8_t); + READ_VALUE(int8_t); return gtNewIconNode(val); } - case TYP_BYTE: + case TYP_BOOL: + case TYP_UBYTE: { - READ_VALUE(int8_t); + READ_VALUE(uint8_t); return gtNewIconNode(val); } case TYP_SHORT: From 5a615c21c525b307f8ece66080e12f7303343d2e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 15:12:03 +0200 Subject: [PATCH 5/8] Factor impImportCnsTreeFromBuffer --- src/coreclr/jit/importer.cpp | 88 ++++++------------------------------ 1 file changed, 15 insertions(+), 73 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b0127eb4f0c703..c7c8048eff389c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3637,86 +3637,28 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI // GenTree* Compiler::impImportCnsTreeFromBuffer(uint8_t* buffer, var_types valueType) { - GenTree* tree = nullptr; - switch (valueType) + GenTree* tree; + if (valueType == TYP_REF) { -// Use memcpy to read from the buffer and create an Icon/Dcon tree -#define CreateTreeFromBuffer(type, treeFactory) \ - type v##type; \ - memcpy(&v##type, buffer, sizeof(type)); \ - tree = treeFactory(v##type); - - case TYP_BOOL: - { - CreateTreeFromBuffer(bool, gtNewIconNode); - break; - } - case TYP_BYTE: - { - CreateTreeFromBuffer(int8_t, gtNewIconNode); - break; - } - case TYP_UBYTE: - { - CreateTreeFromBuffer(uint8_t, gtNewIconNode); - break; - } - case TYP_SHORT: + target_ssize_t ptr; + memcpy(&ptr, buffer, sizeof(ptr)); + if (ptr == 0) { - CreateTreeFromBuffer(int16_t, gtNewIconNode); - break; - } - case TYP_USHORT: - { - CreateTreeFromBuffer(uint16_t, gtNewIconNode); - break; - } - case TYP_UINT: - case TYP_INT: - { - CreateTreeFromBuffer(int32_t, gtNewIconNode); - break; - } - case TYP_LONG: - case TYP_ULONG: - { - CreateTreeFromBuffer(int64_t, gtNewLconNode); - break; + tree = gtNewNull(); } - case TYP_FLOAT: - { - CreateTreeFromBuffer(float, gtNewDconNode); - break; - } - case TYP_DOUBLE: + else { - CreateTreeFromBuffer(double, gtNewDconNode); - break; + setMethodHasFrozenObjects(); + tree = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); + tree->gtType = TYP_REF; + INDEBUG(tree->AsIntCon()->gtTargetHandle = ptr); } - case TYP_REF: - { - size_t ptr; - memcpy(&ptr, buffer, sizeof(ssize_t)); - - if (ptr == 0) - { - tree = gtNewNull(); - } - else - { - setMethodHasFrozenObjects(); - tree = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); - tree->gtType = TYP_REF; - INDEBUG(tree->AsIntCon()->gtTargetHandle = ptr); - } - break; - } - default: - return nullptr; + } + else + { + tree = gtNewGenericCon(valueType, buffer); } - assert(tree != nullptr); - tree->gtType = genActualType(valueType); return tree; } From 25d1002b4087d97c81cb6c143802981d88de5682 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 15:14:08 +0200 Subject: [PATCH 6/8] Fix FEATURE_SIMD ifdef --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d34d8a8bffcfd3..fbd23ea479264b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7793,10 +7793,10 @@ GenTree* Compiler::gtNewGenericCon(var_types type, uint8_t* cnsVal) case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH -#endif // FEATURE_SIMD { return gtNewVconNode(type, cnsVal); } +#endif // FEATURE_SIMD default: unreached(); From a86b8dab278b9848aebad4ecb405c09597a5be6c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Jun 2023 15:34:42 +0200 Subject: [PATCH 7/8] Fix build --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index c7c8048eff389c..e0063bedc89ed8 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3649,7 +3649,7 @@ GenTree* Compiler::impImportCnsTreeFromBuffer(uint8_t* buffer, var_types valueTy else { setMethodHasFrozenObjects(); - tree = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); + tree = gtNewIconEmbHndNode((void*)(ssize_t)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr); tree->gtType = TYP_REF; INDEBUG(tree->AsIntCon()->gtTargetHandle = ptr); } From 1539183f1287bf4da2d0ae2087813e968bc19c1a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 15 Jun 2023 00:10:35 +0200 Subject: [PATCH 8/8] Fix assert --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a1f960d6e4759b..57128f3dab48fd 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3705,7 +3705,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // if (!argNode->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - assert(argNode->OperIs(GT_BLK)); + assert(argNode->OperIs(GT_BLK, GT_CNS_VEC)); unsigned lastElemExactSize = structSize - lastElem.Offset;