From 25a5fbcdce0fef9bcafdc07007eee515771059a8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 9 Mar 2022 00:46:09 +0100 Subject: [PATCH 1/2] Fix early stack offset and size computation for ARM32 * Take split parameters into account for stack size computation * Handle alignment correctly --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/ee_il_dll.cpp | 24 ++++++++----- src/coreclr/jit/lclvars.cpp | 42 ++++++++++++++++------- src/coreclr/jit/morph.cpp | 2 +- src/coreclr/jit/register_arg_convention.h | 4 +-- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index dc8bb8f80cfa46..1d5cb42b741e98 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8215,7 +8215,7 @@ class Compiler CORINFO_CLASS_HANDLE eeGetArgClass(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE list); CORINFO_CLASS_HANDLE eeGetClassFromContext(CORINFO_CONTEXT_HANDLE context); unsigned eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig); - static unsigned eeGetArgAlignment(var_types type, bool isFloatHfa); + static unsigned eeGetArgSizeAlignment(var_types type, bool isFloatHfa); // VOM info, method sigs diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index 2ec2c71f859535..3c184c9d6e15a8 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -456,30 +456,36 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* { argSize = genTypeSize(argType); } - const unsigned argAlignment = eeGetArgAlignment(argType, (hfaType == TYP_FLOAT)); - const unsigned argSizeWithPadding = roundUp(argSize, argAlignment); - return argSizeWithPadding; + + const unsigned argSizeAlignment = eeGetArgSizeAlignment(argType, (hfaType == TYP_FLOAT)); + const unsigned alignedArgSize = roundUp(argSize, argSizeAlignment); + return alignedArgSize; #endif } //------------------------------------------------------------------------ -// eeGetArgAlignment: Return arg passing alignment for the given type. +// eeGetArgSizeAlignment: Return alignment for an argument size. // // Arguments: // type - the argument type // isFloatHfa - is it an HFA type // // Return value: -// the required alignment in bytes. +// the required argument size alignment in bytes. // // Notes: -// It currently doesn't return smaller than required alignment for arm32 (4 bytes for double and int64) -// but it does not lead to issues because its alignment requirements are satisfied in other code parts. -// TODO: fix this function and delete the other code that is handling this. +// Usually values passed on the stack are aligned to stack slot (i.e. pointer size), except for +// on macOS ARM ABI that allows packing multiple args into a single stack slot. +// +// The arg size alignment can be different from the normal alignment. One +// example is on arm32 where a struct containing a double and float can +// explicitly have size 12 but with alignment 8, in which case the size is +// aligned to 4 (the stack slot size) while frame layout must still handle +// aligning the argument to 8. // // static -unsigned Compiler::eeGetArgAlignment(var_types type, bool isFloatHfa) +unsigned Compiler::eeGetArgSizeAlignment(var_types type, bool isFloatHfa) { if (compMacOsArm64Abi()) { diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index df32fcf45dc859..65ad81590ff709 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -955,6 +955,21 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT)); } + + // Check if arg was split between registers and stack. + if (!varTypeUsesFloatReg(argType)) + { + unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg()); + unsigned lastRegArgNum = firstRegArgNum + cSlots - 1; + if (lastRegArgNum >= varDscInfo->maxIntRegArgNum) + { + assert(varDscInfo->stackArgSize == 0); + unsigned numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum; + varDsc->SetStackOffset(-(int)numEnregistered * REGSIZE_BYTES); + varDscInfo->stackArgSize += (cSlots - numEnregistered) * REGSIZE_BYTES; + JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset()); + } + } #endif // TARGET_ARM #ifdef DEBUG @@ -1057,14 +1072,17 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #endif // TARGET_XXX #if FEATURE_FASTTAILCALL - const unsigned argAlignment = eeGetArgAlignment(origArgType, (hfaType == TYP_FLOAT)); - if (compMacOsArm64Abi()) - { - varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment); - } - - assert((argSize % argAlignment) == 0); - assert((varDscInfo->stackArgSize % argAlignment) == 0); +#ifdef TARGET_ARM + unsigned argAlignment = cAlign * TARGET_POINTER_SIZE; +#else + unsigned argAlignment = eeGetArgSizeAlignment(origArgType, (hfaType == TYP_FLOAT)); + // We expect the following rounding operation to be a noop on all + // ABIs except ARM (where we have 8-byte aligned args) and macOS + // ARM64 (that allows to pack multiple smaller parameters in a + // single stack slot). + assert(compMacOsArm64Abi() || ((varDscInfo->stackArgSize % argAlignment) == 0)); +#endif + varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment); JITDUMP("set user arg V%02u offset to %u\n", varDscInfo->varNum, varDscInfo->stackArgSize); varDsc->SetStackOffset(varDscInfo->stackArgSize); @@ -3659,9 +3677,9 @@ unsigned LclVarDsc::lvSize() const // Size needed for storage representation. On if (lvIsParam) { assert(varTypeIsStruct(lvType)); - const bool isFloatHfa = (lvIsHfa() && (GetHfaType() == TYP_FLOAT)); - const unsigned argAlignment = Compiler::eeGetArgAlignment(lvType, isFloatHfa); - return roundUp(lvExactSize, argAlignment); + const bool isFloatHfa = (lvIsHfa() && (GetHfaType() == TYP_FLOAT)); + const unsigned argSizeAlignment = Compiler::eeGetArgSizeAlignment(lvType, isFloatHfa); + return roundUp(lvExactSize, argSizeAlignment); } #if defined(FEATURE_SIMD) && !defined(TARGET_64BIT) @@ -5949,7 +5967,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } #endif // TARGET_ARM const bool isFloatHfa = (varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT)); - const unsigned argAlignment = eeGetArgAlignment(varDsc->lvType, isFloatHfa); + const unsigned argAlignment = eeGetArgSizeAlignment(varDsc->lvType, isFloatHfa); if (compMacOsArm64Abi()) { argOffs = roundUp(argOffs, argAlignment); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2d61a13ba4cbfe..db19e8f3fe1bec 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3076,7 +3076,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // Arm64 Apple has a special ABI for passing small size arguments on stack, // bytes are aligned to 1-byte, shorts to 2-byte, int/float to 4-byte, etc. // It means passing 8 1-byte arguments on stack can take as small as 8 bytes. - argAlignBytes = eeGetArgAlignment(argType, isFloatHfa); + argAlignBytes = eeGetArgSizeAlignment(argType, isFloatHfa); } // diff --git a/src/coreclr/jit/register_arg_convention.h b/src/coreclr/jit/register_arg_convention.h index a1816ba897e85d..07552f85292f31 100644 --- a/src/coreclr/jit/register_arg_convention.h +++ b/src/coreclr/jit/register_arg_convention.h @@ -28,7 +28,6 @@ struct InitVarDscInfo #if FEATURE_FASTTAILCALL // It is used to calculate argument stack size information in byte unsigned stackArgSize; - bool hasMultiSlotStruct; #endif // FEATURE_FASTTAILCALL public: @@ -49,8 +48,7 @@ struct InitVarDscInfo #endif // TARGET_ARM #if FEATURE_FASTTAILCALL - stackArgSize = 0; - hasMultiSlotStruct = false; + stackArgSize = 0; #endif // FEATURE_FASTTAILCALL } From 11cb44e44e525cba25880a242ca71a02c09357ea Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 9 Mar 2022 01:15:30 +0100 Subject: [PATCH 2/2] Fix build --- src/coreclr/jit/lclvars.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 65ad81590ff709..f0ff0e43750814 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -956,6 +956,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT)); } +#if FEATURE_FASTTAILCALL // Check if arg was split between registers and stack. if (!varTypeUsesFloatReg(argType)) { @@ -970,6 +971,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset()); } } +#endif #endif // TARGET_ARM #ifdef DEBUG