diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index db3a1ea967e5d4..6500f562dab17e 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -715,7 +715,7 @@ A frame pointer, if used, points at the bottom of the "fixed" portion of the sta Structs are generally passed by-reference, unless they happen to exactly contain a single primitive field (or be a struct exactly containing such a struct). The linear stack provides the backing storage for the by-reference structs. -Structs are generally returned via hidden buffers, whose address is supplied by the caller and passed just after the `$sp` argument. In such cases the return value of the method is the address of the return value. But if the struct can be passed on the Wasm stack it is returned on the Wasm stack. +Structs are generally returned via hidden buffers, whose address is supplied by the caller and passed just after the managed `this`, or after `$sp` argument when `this` is not present. In such cases the return value of the method is the address of the return value. But if the struct can be passed on the Wasm stack it is returned on the Wasm stack. (TBD: ABI for vector types) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 3ece0522fe88eb..84478472487e40 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -5,6 +5,7 @@ #include "interpreter.h" #include "stackmap.h" +#include #include #include // for std::bad_alloc @@ -2023,18 +2024,13 @@ int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, Inte if (interpType == InterpTypeVT) { size = m_compHnd->getClassSize(clsHnd); - align = m_compHnd->getClassAlignmentRequirement(clsHnd); - - // All vars are stored at 8 byte aligned offsets - if (align < INTERP_STACK_SLOT_SIZE) - align = INTERP_STACK_SLOT_SIZE; + // All vars are stored at least at 8 byte aligned offsets // We do not align beyond the stack alignment // (This is relevant for structs with very high alignment requirements, // where we align within struct layout, but the structs are not actually // aligned on the stack) - if (align > INTERP_STACK_ALIGNMENT) - align = INTERP_STACK_ALIGNMENT; + align = std::clamp(m_compHnd->getClassAlignmentRequirement(clsHnd), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); } else { diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index 2636afdb468be9..e4987123ffc3f9 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -3,6 +3,8 @@ #include "interpreter.h" #include "stackmap.h" +#include + // Allocates the offset for var at the stack position identified by // *pPos while bumping the pointer to point to the next stack location int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) @@ -14,14 +16,10 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) size_t align = INTERP_STACK_SLOT_SIZE; - if (size > INTERP_STACK_SLOT_SIZE) + if (size > (int32_t)INTERP_STACK_SLOT_SIZE) { assert(m_pVars[var].interpType == InterpTypeVT); - align = m_compHnd->getClassAlignmentRequirement(m_pVars[var].clsHnd); - if (align < INTERP_STACK_SLOT_SIZE) - align = INTERP_STACK_SLOT_SIZE; - if (align > INTERP_STACK_ALIGNMENT) - align = INTERP_STACK_ALIGNMENT; + align = std::clamp(m_compHnd->getClassAlignmentRequirement(m_pVars[var].clsHnd), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); } else { diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index a25c24f4cc1be3..f9b19392204805 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,8 +14,8 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER -#define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack -#define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame +#define INTERP_STACK_SLOT_SIZE 8u // Alignment of each var offset on the interpreter stack +#define INTERP_STACK_ALIGNMENT 16u // Alignment of interpreter stack at the start of a frame struct InterpHelperData { uint32_t addressDataItemIndex : 29; diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp index 783c1a239cc4ad..b86aa01cf75ff1 100644 --- a/src/coreclr/vm/callhelpers.cpp +++ b/src/coreclr/vm/callhelpers.cpp @@ -223,6 +223,7 @@ void* DispatchCallSimple( #ifdef TARGET_WASM static_assert(2*sizeof(ARGHOLDER_TYPE) == INTERP_STACK_SLOT_SIZE); callDescrData.nArgsSize = numStackSlotsToCopy * sizeof(ARGHOLDER_TYPE)*2; + callDescrData.hasRetBuff = false; LPVOID pOrigSrc = callDescrData.pSrc; callDescrData.pSrc = (LPVOID)_alloca(callDescrData.nArgsSize); for (int i = 0; i < numStackSlotsToCopy; i++) @@ -540,6 +541,8 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT * callDescrData.pTarget = m_pCallTarget; #ifdef TARGET_WASM callDescrData.nArgsSize = nStackBytes; + callDescrData.hasRetBuff = false; + _ASSERTE(!m_argIt.HasRetBuffArg()); #endif // TARGET_WASM CallDescrWorkerWithHandler(&callDescrData); diff --git a/src/coreclr/vm/callhelpers.h b/src/coreclr/vm/callhelpers.h index 3d3a25d803b067..a193959cf9dc2a 100644 --- a/src/coreclr/vm/callhelpers.h +++ b/src/coreclr/vm/callhelpers.h @@ -28,7 +28,9 @@ struct CallDescrData #ifdef TARGET_WASM // size of the arguments and the transition block are used to execute the method with the interpreter size_t nArgsSize; -#endif + bool hasThis; + bool hasRetBuff; +#endif // TARGET_WASM #ifdef CALLDESCR_RETBUFFARGREG // Pointer to return buffer arg location diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 2a609a7ad4e604..f7a76c516713d5 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -12,7 +12,10 @@ #ifndef __CALLING_CONVENTION_INCLUDED #define __CALLING_CONVENTION_INCLUDED +#include + #ifdef FEATURE_INTERPRETER +#include #include #endif // FEATURE_INTERPRETER @@ -1902,6 +1905,27 @@ int ArgIteratorTemplate::GetNextOffset() return argOfs; #elif defined(TARGET_WASM) + + unsigned align; + if (argType == ELEMENT_TYPE_VALUETYPE) + { + align = std::clamp(CEEInfo::getClassAlignmentRequirementStatic(thValueType.GetMethodTable()), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); + } + else + { + align = INTERP_STACK_SLOT_SIZE; + } + + if (HasRetBuffArg()) + { + // the slot for retbuf arg will be removed before the actual call + m_ofsStack = ALIGN_UP(m_ofsStack - INTERP_STACK_SLOT_SIZE, align) + INTERP_STACK_SLOT_SIZE; + } + else + { + m_ofsStack = ALIGN_UP(m_ofsStack, align); + } + int cbArg = ALIGN_UP(argSize, INTERP_STACK_SLOT_SIZE); int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index faea387d418ad9..587ce6f2a05c6c 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -2891,15 +2891,7 @@ void CallStubGenerator::ComputeCallStubWorker(bool hasUnmanagedCallConv, CorInfo CorElementType corType = argIt.GetArgType(&thArgTypeHandle); if ((corType == ELEMENT_TYPE_VALUETYPE) && thArgTypeHandle.GetSize() > INTERP_STACK_SLOT_SIZE) { - unsigned align = CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle); - if (align < INTERP_STACK_SLOT_SIZE) - { - align = INTERP_STACK_SLOT_SIZE; - } - else if (align > INTERP_STACK_ALIGNMENT) - { - align = INTERP_STACK_ALIGNMENT; - } + unsigned align = std::clamp(CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle), INTERP_STACK_SLOT_SIZE, INTERP_STACK_ALIGNMENT); assert(align == 8 || align == 16); // At the moment, we can only have an 8 or 16 byte alignment requirement here if (interpreterStackOffset != ALIGN_UP(interpreterStackOffset, align)) { diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index f8f9e26d0f830d..672003c99ef859 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -4103,7 +4103,7 @@ do \ if (returnValueSize > 0) { - if (returnValueSize > INTERP_STACK_SLOT_SIZE) + if (returnValueSize > (int32_t)INTERP_STACK_SLOT_SIZE) { memset(pFrame->pRetVal, 0, returnValueSize); } diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index ed82fba96fb8c4..37343b4fcb9cc2 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -405,8 +405,7 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod( CallDescrData callDescrData; callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); - _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); - callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; + callDescrData.numStackSlots = ALIGN_UP(nStackBytes, TARGET_REGISTER_SIZE) / TARGET_REGISTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif @@ -423,6 +422,8 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod( #ifdef TARGET_WASM // WASM-TODO: this is now called from the interpreter, so the arguments layout is OK. reconsider with codegen callDescrData.nArgsSize = nStackBytes; + callDescrData.hasThis = argit.HasThis(); + callDescrData.hasRetBuff = argit.HasRetBuffArg(); #endif // TARGET_WASM // This is duplicated logic from MethodDesc::GetCallTarget diff --git a/src/coreclr/vm/wasm/calldescrworkerwasm.cpp b/src/coreclr/vm/wasm/calldescrworkerwasm.cpp index 6f0fbefb682f56..5c64790f55b8c2 100644 --- a/src/coreclr/vm/wasm/calldescrworkerwasm.cpp +++ b/src/coreclr/vm/wasm/calldescrworkerwasm.cpp @@ -7,6 +7,8 @@ // Forward declaration void ExecuteInterpretedMethodWithArgs(TADDR targetIp, int8_t* args, size_t argSize, void* retBuff, PCODE callerIp); +#define SPECIAL_ARG_ADDR(pos) (void**)(((int8_t*)pCallDescrData->pSrc) + ((pos)*INTERP_STACK_SLOT_SIZE)) + extern "C" void STDCALL CallDescrWorkerInternal(CallDescrData* pCallDescrData) { _ASSERTE(pCallDescrData != NULL); @@ -26,5 +28,28 @@ extern "C" void STDCALL CallDescrWorkerInternal(CallDescrData* pCallDescrData) targetIp = pMethod->GetInterpreterCode(); } - ExecuteInterpretedMethodWithArgs((TADDR)targetIp, (int8_t*)pCallDescrData->pSrc, pCallDescrData->nArgsSize, (int8_t*)pCallDescrData->returnValue, (PCODE)&CallDescrWorkerInternal); + size_t argsSize = pCallDescrData->nArgsSize; + void* retBuff; + int8_t* args; + if (pCallDescrData->hasRetBuff) + { + argsSize -= INTERP_STACK_SLOT_SIZE; + if (pCallDescrData->hasThis) + { + retBuff = *SPECIAL_ARG_ADDR(1); + *SPECIAL_ARG_ADDR(1) = *SPECIAL_ARG_ADDR(0); + } + else + { + retBuff = *SPECIAL_ARG_ADDR(0); + } + args = (int8_t*)SPECIAL_ARG_ADDR(1); + } + else + { + args = (int8_t*)pCallDescrData->pSrc; + retBuff = pCallDescrData->returnValue; + } + + ExecuteInterpretedMethodWithArgs((TADDR)targetIp, args, argsSize, retBuff, (PCODE)&CallDescrWorkerInternal); } diff --git a/src/coreclr/vm/wasm/cgencpu.h b/src/coreclr/vm/wasm/cgencpu.h index 1e076e9d01e089..5829f5fd798592 100644 --- a/src/coreclr/vm/wasm/cgencpu.h +++ b/src/coreclr/vm/wasm/cgencpu.h @@ -67,7 +67,8 @@ struct ArgumentRegisters { #define NUM_ARGUMENT_REGISTERS 0 #define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters) -#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // not sure here, 16 bytes is v128 +#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 8 // bytes +#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // bytes, so that v128 can be returned without retbuff #define STACKWALK_CONTROLPC_ADJUST_OFFSET 1 diff --git a/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs b/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs index 517b875bdb2e6a..c042e690f51b84 100644 --- a/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs +++ b/src/tests/JIT/Directed/VectorABI/VectorMgdMgdArray.cs @@ -344,7 +344,7 @@ private void checkValues(string msg, Vector128 v, int index) { if (!printedMsg) { - Console.WriteLine("{0}: FAILED - Vector64 checkValues(index = {1}, i = {2}) {3}", + Console.WriteLine("{0}: FAILED - Vector128 checkValues(index = {1}, i = {2}) {3}", msg, index, i, isReflection ? "(via reflection)" : "" ); printedMsg = true; }