From 754849c0c4957a512fb0007a5898ba22dd4474a2 Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Thu, 5 Feb 2026 16:30:56 +0900 Subject: [PATCH 1/8] runtime-async support on armel interpreter --- src/coreclr/vm/arm/asmhelpers.S | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 2af508a0df973c..38110184315451 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1119,6 +1119,8 @@ NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetVoid, _TEXT @@ -1134,6 +1136,8 @@ NESTED_ENTRY CallJittedMethodRetI4, _TEXT, NoHandler blx r5 mov sp, r4 str r0, [r10] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI4, _TEXT @@ -1150,6 +1154,8 @@ NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler mov sp, r4 str r0, [r10] str r1, [r10, 4] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI8, _TEXT @@ -1172,6 +1178,8 @@ NESTED_ENTRY CallJittedMethodRetBuffR0, _TEXT, NoHandler CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetBuffR0, _TEXT @@ -1186,6 +1194,8 @@ NESTED_ENTRY CallJittedMethodRetBuffR1, _TEXT, NoHandler CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetBuffR1, _TEXT @@ -1202,6 +1212,8 @@ NESTED_ENTRY CallJittedMethodRetI1, _TEXT, NoHandler mov sp, r4 sxtb r0, r0 str r0, [r10] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI1, _TEXT @@ -1218,6 +1230,8 @@ NESTED_ENTRY CallJittedMethodRetI2, _TEXT, NoHandler mov sp, r4 sxth r0, r0 str r0, [r10] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI2, _TEXT @@ -1234,6 +1248,8 @@ NESTED_ENTRY CallJittedMethodRetU1, _TEXT, NoHandler mov sp, r4 uxtb r0, r0 str r0, [r10] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetU1, _TEXT @@ -1250,6 +1266,8 @@ NESTED_ENTRY CallJittedMethodRetU2, _TEXT, NoHandler mov sp, r4 uxth r0, r0 str r0, [r10] + ldr r4, [r4, 32] + str r2, [r4] EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetU2, _TEXT @@ -1283,13 +1301,16 @@ LOCAL_LABEL(HaveInterpThreadContext): cmp r7, #0 beq LOCAL_LABEL(NoManagedThreadOrCallStub) add r6, r7, #OFFSETOF__CallStubHeader__Routines - ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE + ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] ldr r5, [r6], 4 // InterpThreadContext EPILOG_POP "{r0-r3}" CHECK_STACK_ALIGNMENT blx r5 + // Fill in the ContinuationContext register + ldr r2, [sp, #__PWTB_ArgumentRegisters + 8] + EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT From 486fa82fe7acf5b40d88e53eaa29f33357c47525 Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Thu, 5 Feb 2026 21:52:05 +0900 Subject: [PATCH 2/8] Fix Call ALIGNMENT for ARM To meet call standard for the ARM, align the stack 8 bytes for CallStub. --- src/coreclr/vm/callstubgenerator.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 587ce6f2a05c6c..1a3ae72b660b0a 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -2456,7 +2456,12 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTra S_SIZE_T finalStubSize(sizeof(CallStubHeader) + m_routineIndex * sizeof(PCODE)); void *pHeaderStorage = pamTracker->Track(pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(finalStubSize)); +#ifdef TARGET_ARM + // AAPCS compliant stack alignment for function calls + CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, pRoutines, ALIGN_UP(m_totalStackSize, 8), sig.IsAsyncCall(), m_pInvokeFunction); +#else CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), m_pInvokeFunction); +#endif // TARGET_ARM return pHeader; } From a3eccdf6062ad330a9174111b7caf2620fe0c46c Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Thu, 5 Mar 2026 15:44:04 +0900 Subject: [PATCH 3/8] Fix isFilter stack offset in ARM32 CallInterpreterFunclet Fix wrong stack offset for isFilter in ARM32 CallInterpreterFunclet. Used __PWTB_ArgumentRegisters instead of __PWTB_TransitionBlock to account for CalleeSavedRegisters (36 bytes). --- src/coreclr/vm/arm/asmhelpers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 4191a84f57bffb..590f6236f4200d 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1417,9 +1417,9 @@ NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler str r12, [sp, #4] // Load isFilter from original stack location and store as 5th param (1st stack arg) - // After PROLOG_WITH_TRANSITION_BLOCK, original stack args are at __PWTB_TransitionBlock offset + // After PROLOG_WITH_TRANSITION_BLOCK, original stack args are past the ArgumentRegisters // The 5th param (isFilter) was pushed before our stack allocation - ldr r12, [sp, #8 + __PWTB_TransitionBlock + SIZEOF__ArgumentRegisters] + ldr r12, [sp, #8 + __PWTB_ArgumentRegisters + SIZEOF__ArgumentRegisters] str r12, [sp, #0] // r0-r3 remain unchanged From 4964c18784b1af1d9126a9b8edd0d9affa0f9614 Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Wed, 11 Mar 2026 16:44:53 +0900 Subject: [PATCH 4/8] Add missing Load_R1_R2_4B and Store_R1_R2_4B routines for ARM32 interpreter --- src/coreclr/vm/arm/asmhelpers.S | 14 ++++++++++++++ src/coreclr/vm/callstubgenerator.cpp | 6 ++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 590f6236f4200d..807e9b24ff7c70 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -996,6 +996,13 @@ ALTERNATE_ENTRY Load_R2_R3_4B EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_R2_R3_4B +LEAF_ENTRY Load_R1_R2_4B + ldr r1, [r7], #4 + ldr r2, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Load_R1_R2_4B + LEAF_ENTRY Store_R0 str r0, [r7], #8 ldr r5, [r6], #4 @@ -1049,6 +1056,13 @@ ALTERNATE_ENTRY Store_R2_R3_4B EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3_4B +LEAF_ENTRY Store_R1_R2_4B + str r1, [r7], #4 + str r2, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_R1_R2_4B + LEAF_ENTRY InjectInterpStackAlign add r7, r7, #4 ldr r5, [r6], #4 diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 52466bac2f57dc..71003f4164fe33 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -606,10 +606,12 @@ extern "C" void Store_R3(); extern "C" void Load_R0_R1_4B(); extern "C" void Load_R0_R1_R2_R3_4B(); +extern "C" void Load_R1_R2_4B(); extern "C" void Load_R2_R3_4B(); extern "C" void Load_Stack_4B(); extern "C" void Store_R0_R1_4B(); extern "C" void Store_R0_R1_R2_R3_4B(); +extern "C" void Store_R1_R2_4B(); extern "C" void Store_R2_R3_4B(); extern "C" void Store_Stack_4B(); @@ -1120,13 +1122,13 @@ PCODE CallStubGenerator::GetRegRoutine_4B(int r1, int r2) #endif static const PCODE GPRegLoadRoutines_4B[] = { (PCODE)0, (PCODE)Load_R0_R1_4B, (PCODE)0, (PCODE)Load_R0_R1_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, + (PCODE)0, (PCODE)0, (PCODE)Load_R1_R2_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Load_R2_R3_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0 }; static const PCODE GPRegStoreRoutines_4B[] = { (PCODE)0, (PCODE)Store_R0_R1_4B, (PCODE)0, (PCODE)Store_R0_R1_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, + (PCODE)0, (PCODE)0, (PCODE)Store_R1_R2_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_R2_R3_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0 }; From cc5eb9985a853df6f1c01812e4267e2186eec43b Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Wed, 11 Mar 2026 21:30:40 +0900 Subject: [PATCH 5/8] Fix ARM32 Store_Stack offset by moving after InterpreterStub's PROLOG_WITH_TRANSITION_BLOCK --- src/coreclr/vm/arm/asmhelpers.S | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 807e9b24ff7c70..24659b8b2dd980 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1095,34 +1095,6 @@ LOCAL_LABEL(CopyLoop_4B): EPILOG_BRANCH_REG r5 LEAF_END Load_Stack_4B -LEAF_ENTRY Store_Stack - ldr r9, [r6], #4 // SP offset - ldr r8, [r6], #4 // number of stack slots - add r9, sp, r9 - add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock -LOCAL_LABEL(StoreCopyLoop): - ldr r5, [r9], #4 - str r5, [r7], #8 - subs r8, r8, #4 - bne LOCAL_LABEL(StoreCopyLoop) - ldr r5, [r6], #4 - EPILOG_BRANCH_REG r5 -LEAF_END Store_Stack - -LEAF_ENTRY Store_Stack_4B - ldr r9, [r6], #4 // SP offset - ldr r8, [r6], #4 // number of stack slots - add r9, sp, r9 - add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock -LOCAL_LABEL(StoreCopyLoop_4B): - ldr r5, [r9], #4 - str r5, [r7], #4 - subs r8, r8, #4 - bne LOCAL_LABEL(StoreCopyLoop_4B) - ldr r5, [r6], #4 - EPILOG_BRANCH_REG r5 -LEAF_END Store_Stack_4B - NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler PROLOG_PUSH "{r4-r10,lr}" mov r4, sp @@ -1403,6 +1375,35 @@ NESTED_ENTRY InterpreterStubRetBuffR1, _TEXT, NoHandler EPILOG_POP {pc} NESTED_END InterpreterStubRetBuffR1, _TEXT +LEAF_ENTRY Store_Stack + ldr r9, [r6], #4 // SP offset + ldr r8, [r6], #4 // number of stack slots + add r9, sp, r9 + add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop): + ldr r5, [r9], #4 + str r5, [r7], #8 + subs r8, r8, #4 + bne LOCAL_LABEL(StoreCopyLoop) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_Stack + +LEAF_ENTRY Store_Stack_4B + ldr r9, [r6], #4 // SP offset + ldr r8, [r6], #4 // number of stack slots + add r9, sp, r9 + add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop_4B): + ldr r5, [r9], #4 + str r5, [r7], #4 + subs r8, r8, #4 + bne LOCAL_LABEL(StoreCopyLoop_4B) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_Stack_4B + + // ------------------------------------------------------------------ // Create a real TransitionBlock and call CallInterpreterFuncletWorker // to execute an interpreter funclet (catch/finally/filter handler). From 80dd0fea472c101a33003933e26d06ceaa987e18 Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Thu, 12 Mar 2026 11:43:39 +0900 Subject: [PATCH 6/8] Fix Call ALIGNMENT for ARM --- src/coreclr/vm/arm/cgencpu.h | 1 + src/coreclr/vm/callstubgenerator.cpp | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index fea7e5a41d5354..f073cca8d32872 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -63,6 +63,7 @@ EXTERN_C void checkStack(void); #define COMMETHOD_PREPAD 12 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc) #define STACK_ALIGN_SIZE 4 +#define CALL_STACK_ALIGN_SIZE 8 #define JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a jump instruction #define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a back to back jump instruction diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 71003f4164fe33..d0e666e4b81401 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1608,7 +1608,7 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTra int targetSlotIndex = m_interpreterToNative ? m_targetSlotIndex : (m_routineIndex - 1); #ifdef TARGET_ARM // AAPCS compliant stack alignment for function calls - CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, targetSlotIndex, pRoutines, ALIGN_UP(m_totalStackSize, 8), sig.IsAsyncCall(), hasSwiftError, hasSwiftReturnLowering, m_pInvokeFunction); + CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, targetSlotIndex, pRoutines, ALIGN_UP(m_totalStackSize, CALL_STACK_ALIGN_SIZE), sig.IsAsyncCall(), hasSwiftError, hasSwiftReturnLowering, m_pInvokeFunction); #else CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, targetSlotIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), hasSwiftError, hasSwiftReturnLowering, m_pInvokeFunction); #endif // TARGET_ARM @@ -1704,6 +1704,12 @@ CallStubHeader *CallStubGenerator::GenerateCallStubForSig(MetaSig &sig) PCODE *pRoutines = (PCODE*)alloca(tempStorageSize); memset(pRoutines, 0, tempStorageSize); + int totalStackSize = m_totalStackSize; +#ifdef TARGET_ARM + // AAPCS compliant stack alignment for function calls + totalStackSize = ALIGN_UP(totalStackSize, CALL_STACK_ALIGN_SIZE); +#endif // TARGET_ARM + m_interpreterToNative = true; // We always generate the interpreter to native call stub here ComputeCallStub(sig, pRoutines, NULL); @@ -1713,7 +1719,8 @@ CallStubHeader *CallStubGenerator::GenerateCallStubForSig(MetaSig &sig) { hashState.AddPointer((void*)pRoutines[i]); } - hashState.Add(m_totalStackSize); + + hashState.Add(totalStackSize); hashState.AddPointer((void*)m_pInvokeFunction); hashState.Add(sig.IsAsyncCall() ? 1 : 0); hashState.Add(m_targetSlotIndex); @@ -1726,7 +1733,7 @@ CallStubHeader *CallStubGenerator::GenerateCallStubForSig(MetaSig &sig) m_routineIndex, m_targetSlotIndex, pRoutines, - ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), + ALIGN_UP(totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), #if defined(TARGET_APPLE) && defined(TARGET_ARM64) m_hasSwiftError, @@ -1752,9 +1759,9 @@ CallStubHeader *CallStubGenerator::GenerateCallStubForSig(MetaSig &sig) void* pHeaderStorage = amTracker.Track(SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(finalCachedCallStubSize))); // hasSwiftReturnLowering is always false here because m_interpreterToNative = true (see line 1601's logic) #if defined(TARGET_APPLE) && defined(TARGET_ARM64) - CachedCallStub *pHeader = new (pHeaderStorage) CachedCallStub(cachedHeaderKey.HashCode, m_routineIndex, m_targetSlotIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), m_hasSwiftError, false /* hasSwiftReturnLowering */, m_pInvokeFunction); + CachedCallStub *pHeader = new (pHeaderStorage) CachedCallStub(cachedHeaderKey.HashCode, m_routineIndex, m_targetSlotIndex, pRoutines, ALIGN_UP(totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), m_hasSwiftError, false /* hasSwiftReturnLowering */, m_pInvokeFunction); #else - CachedCallStub *pHeader = new (pHeaderStorage) CachedCallStub(cachedHeaderKey.HashCode, m_routineIndex, m_targetSlotIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), false, false, m_pInvokeFunction); + CachedCallStub *pHeader = new (pHeaderStorage) CachedCallStub(cachedHeaderKey.HashCode, m_routineIndex, m_targetSlotIndex, pRoutines, ALIGN_UP(totalStackSize, STACK_ALIGN_SIZE), sig.IsAsyncCall(), false, false, m_pInvokeFunction); #endif s_callStubCache->Add(pHeader); amTracker.SuppressRelease(); From 8694aec7e3123a71fc2761990e4ab2d4a10041da Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Thu, 12 Mar 2026 20:26:40 +0900 Subject: [PATCH 7/8] Fix Crashes in Interpreter --- src/coreclr/vm/arm/asmhelpers.S | 32 +++++++++++++-------- src/coreclr/vm/callstubgenerator.cpp | 43 +++++++++++++++++----------- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 24659b8b2dd980..532da5f3bc648e 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -986,22 +986,26 @@ LEAF_ENTRY Load_R0_R1_4B EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_4B -LEAF_ENTRY Load_R0_R1_R2_R3_4B +LEAF_ENTRY Load_R0_R1_R2_4B ldr r0, [r7], #4 +ALTERNATE_ENTRY Load_R1_R2_4B ldr r1, [r7], #4 -ALTERNATE_ENTRY Load_R2_R3_4B ldr r2, [r7], #4 - ldr r3, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Load_R0_R1_R2_R3_4B +LEAF_END Load_R0_R1_R2_4B -LEAF_ENTRY Load_R1_R2_4B +LEAF_ENTRY Load_R0_R1_R2_R3_4B + ldr r0, [r7], #4 +ALTERNATE_ENTRY Load_R1_R2_R3_4B ldr r1, [r7], #4 +ALTERNATE_ENTRY Load_R2_R3_4B ldr r2, [r7], #4 +ALTERNATE_ENTRY Load_R3_4B + ldr r3, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Load_R1_R2_4B +LEAF_END Load_R0_R1_R2_R3_4B LEAF_ENTRY Store_R0 str r0, [r7], #8 @@ -1046,22 +1050,26 @@ LEAF_ENTRY Store_R0_R1_4B EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_4B -LEAF_ENTRY Store_R0_R1_R2_R3_4B +LEAF_ENTRY Store_R0_R1_R2_4B str r0, [r7], #4 +ALTERNATE_ENTRY Store_R1_R2_4B str r1, [r7], #4 -ALTERNATE_ENTRY Store_R2_R3_4B str r2, [r7], #4 - str r3, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Store_R0_R1_R2_R3_4B +LEAF_END Store_R0_R1_R2_4B -LEAF_ENTRY Store_R1_R2_4B +LEAF_ENTRY Store_R0_R1_R2_R3_4B + str r0, [r7], #4 +ALTERNATE_ENTRY Store_R1_R2_R3_4B str r1, [r7], #4 +ALTERNATE_ENTRY Store_R2_R3_4B str r2, [r7], #4 +ALTERNATE_ENTRY Store_R3_4B + str r3, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Store_R1_R2_4B +LEAF_END Store_R0_R1_R2_R3_4B LEAF_ENTRY InjectInterpStackAlign add r7, r7, #4 diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index d0e666e4b81401..ca76de0eaa5ae2 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -605,14 +605,20 @@ extern "C" void Store_R2_R3(); extern "C" void Store_R3(); extern "C" void Load_R0_R1_4B(); +extern "C" void Load_R0_R1_R2_4B(); extern "C" void Load_R0_R1_R2_R3_4B(); extern "C" void Load_R1_R2_4B(); +extern "C" void Load_R1_R2_R3_4B(); extern "C" void Load_R2_R3_4B(); +extern "C" void Load_R3_4B(); extern "C" void Load_Stack_4B(); extern "C" void Store_R0_R1_4B(); +extern "C" void Store_R0_R1_R2_4B(); extern "C" void Store_R0_R1_R2_R3_4B(); extern "C" void Store_R1_R2_4B(); +extern "C" void Store_R1_R2_R3_4B(); extern "C" void Store_R2_R3_4B(); +extern "C" void Store_R3_4B(); extern "C" void Store_Stack_4B(); #endif // TARGET_ARM @@ -789,7 +795,7 @@ extern "C" void Store_FA7(); PCODE CallStubGenerator::GetStackRoutine() { - LOG2((LF2_INTERPRETER, LL_INFO10000, "Load_Stack\n")); + LOG2((LF2_INTERPRETER, LL_INFO10000, "GetStackRoutine\n")); return m_interpreterToNative ? (PCODE)Load_Stack : (PCODE)Store_Stack; } @@ -1121,20 +1127,22 @@ PCODE CallStubGenerator::GetRegRoutine_4B(int r1, int r2) LOG2((LF2_INTERPRETER, LL_INFO10000, "GetRegRoutine_4B\n")); #endif static const PCODE GPRegLoadRoutines_4B[] = { - (PCODE)0, (PCODE)Load_R0_R1_4B, (PCODE)0, (PCODE)Load_R0_R1_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)Load_R1_R2_4B, (PCODE)0, + (PCODE)0, (PCODE)Load_R0_R1_4B, (PCODE)Load_R0_R1_R2_4B, (PCODE)Load_R0_R1_R2_R3_4B, + (PCODE)0, (PCODE)0, (PCODE)Load_R1_R2_4B, (PCODE)Load_R1_R2_R3_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Load_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0 + (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Load_R3_4B }; static const PCODE GPRegStoreRoutines_4B[] = { - (PCODE)0, (PCODE)Store_R0_R1_4B, (PCODE)0, (PCODE)Store_R0_R1_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)Store_R1_R2_4B, (PCODE)0, + (PCODE)0, (PCODE)Store_R0_R1_4B, (PCODE)Store_R0_R1_R2_4B, (PCODE)Store_R0_R1_R2_R3_4B, + (PCODE)0, (PCODE)0, (PCODE)Store_R1_R2_4B, (PCODE)Store_R1_R2_R3_4B, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_R2_R3_4B, - (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0 + (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_R3_4B }; int index = r1 * NUM_ARGUMENT_REGISTERS + r2; - return m_interpreterToNative ? GPRegLoadRoutines_4B[index] : GPRegStoreRoutines_4B[index]; + PCODE routine = m_interpreterToNative ? GPRegLoadRoutines_4B[index] : GPRegStoreRoutines_4B[index]; + _ASSERTE(routine != 0); + return routine; } PCODE CallStubGenerator::GetStackRoutine_4B() @@ -1704,16 +1712,16 @@ CallStubHeader *CallStubGenerator::GenerateCallStubForSig(MetaSig &sig) PCODE *pRoutines = (PCODE*)alloca(tempStorageSize); memset(pRoutines, 0, tempStorageSize); + m_interpreterToNative = true; // We always generate the interpreter to native call stub here + + ComputeCallStub(sig, pRoutines, NULL); + int totalStackSize = m_totalStackSize; #ifdef TARGET_ARM // AAPCS compliant stack alignment for function calls totalStackSize = ALIGN_UP(totalStackSize, CALL_STACK_ALIGN_SIZE); #endif // TARGET_ARM - m_interpreterToNative = true; // We always generate the interpreter to native call stub here - - ComputeCallStub(sig, pRoutines, NULL); - xxHash hashState; for (int i = 0; i < m_routineIndex; i++) { @@ -2291,7 +2299,7 @@ void CallStubGenerator::ComputeCallStubWorker(bool hasUnmanagedCallConv, CorInfo } else #elif defined(TARGET_ARM) && defined(ARM_SOFTFP) - if (argLocDesc.m_cGenReg != 0 && argLocDesc.m_byteStackSize != 0) + if (argLocDesc.m_cGenReg > 1 && argLocDesc.m_byteStackSize > 4) { ArgLocDesc argLocDescReg = {}; argLocDescReg.m_idxGenReg = argLocDesc.m_idxGenReg; @@ -2379,9 +2387,10 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg RoutineType argType = RoutineType::None; #ifdef TARGET_ARM - if (argLocDesc.m_cGenReg == 2 || argLocDesc.m_byteStackSize >= 8) + bool needToHandleAs4B = false; + if ((argLocDesc.m_cGenReg * 4 + argLocDesc.m_byteStackSize) >= 8) { - /* do nothing */ + needToHandleAs4B = true; } else #endif // TARGET_ARM @@ -2417,7 +2426,7 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg { LOG2((LF2_INTERPRETER, LL_INFO10000, "m_cGenReg=%d\n", (int)argLocDesc.m_cGenReg)); #ifdef TARGET_ARM - if (argLocDesc.m_cGenReg == 2) + if (needToHandleAs4B) { pRoutines[m_routineIndex++] = GetRegRoutine_4B(argLocDesc.m_idxGenReg, argLocDesc.m_idxGenReg + argLocDesc.m_cGenReg - 1); } @@ -2501,7 +2510,7 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg { LOG2((LF2_INTERPRETER, LL_INFO10000, "m_byteStackSize=%d\n", (int)argLocDesc.m_byteStackSize)); #ifdef TARGET_ARM - if (argLocDesc.m_byteStackSize >= 8) + if (needToHandleAs4B) { pRoutines[m_routineIndex++] = GetStackRoutine_4B(); pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; From 14a53ce3a8f65f57ac2385e1a24d021b484702cb Mon Sep 17 00:00:00 2001 From: DongHeon Jung Date: Fri, 13 Mar 2026 14:04:18 +0900 Subject: [PATCH 8/8] Fix error about struct over 8 bytes --- src/coreclr/vm/callstubgenerator.cpp | 31 ++++++++++++---------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index ca76de0eaa5ae2..f924f93f3b7fa9 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -2298,20 +2298,6 @@ void CallStubGenerator::ComputeCallStubWorker(bool hasUnmanagedCallConv, CorInfo } } else -#elif defined(TARGET_ARM) && defined(ARM_SOFTFP) - if (argLocDesc.m_cGenReg > 1 && argLocDesc.m_byteStackSize > 4) - { - ArgLocDesc argLocDescReg = {}; - argLocDescReg.m_idxGenReg = argLocDesc.m_idxGenReg; - argLocDescReg.m_cGenReg = argLocDesc.m_cGenReg; - ProcessArgument(&argIt, argLocDescReg, pRoutines); - - ArgLocDesc argLocDescStack = {}; - argLocDescStack.m_byteStackIndex = argLocDesc.m_byteStackIndex; - argLocDescStack.m_byteStackSize = argLocDesc.m_byteStackSize; - ProcessArgument(&argIt, argLocDescStack, pRoutines); - } - else #endif // UNIX_AMD64_ABI { ProcessArgument(&argIt, argLocDesc, pRoutines); @@ -2387,10 +2373,10 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg RoutineType argType = RoutineType::None; #ifdef TARGET_ARM - bool needToHandleAs4B = false; + bool useRoutine4B = false; if ((argLocDesc.m_cGenReg * 4 + argLocDesc.m_byteStackSize) >= 8) { - needToHandleAs4B = true; + useRoutine4B = true; } else #endif // TARGET_ARM @@ -2426,7 +2412,7 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg { LOG2((LF2_INTERPRETER, LL_INFO10000, "m_cGenReg=%d\n", (int)argLocDesc.m_cGenReg)); #ifdef TARGET_ARM - if (needToHandleAs4B) + if (useRoutine4B) { pRoutines[m_routineIndex++] = GetRegRoutine_4B(argLocDesc.m_idxGenReg, argLocDesc.m_idxGenReg + argLocDesc.m_cGenReg - 1); } @@ -2510,7 +2496,7 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg { LOG2((LF2_INTERPRETER, LL_INFO10000, "m_byteStackSize=%d\n", (int)argLocDesc.m_byteStackSize)); #ifdef TARGET_ARM - if (needToHandleAs4B) + if (useRoutine4B) { pRoutines[m_routineIndex++] = GetStackRoutine_4B(); pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; @@ -2613,6 +2599,15 @@ void CallStubGenerator::ProcessArgument(ArgIteratorType *pArgIt, ArgLocDesc& arg } #endif // ENREGISTERED_PARAMTYPE_MAXSIZE #endif // UNIX_AMD64_ABI +#ifdef TARGET_ARM + if (useRoutine4B) + { + if ((argLocDesc.m_cGenReg * 4 + argLocDesc.m_byteStackSize) % INTERP_STACK_SLOT_SIZE != 0) + { + pRoutines[m_routineIndex++] = (PCODE)InjectInterpStackAlign; + } + } +#endif // TARGET_ARM m_currentRoutineType = argType; }