From 57bd5b99776b07a973a1a041be233235fc5f6832 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Fri, 16 Jan 2026 21:03:22 +0000 Subject: [PATCH 01/14] Bring a few jithelpers to new unwind plan --- src/coreclr/pal/inc/unixasmmacrosarm64.inc | 26 ++ .../pal/inc/unixasmmacrosloongarch64.inc | 25 ++ src/coreclr/pal/inc/unixasmmacrosriscv64.inc | 29 ++ src/coreclr/vm/amd64/AsmHelpers.asm | 9 +- src/coreclr/vm/amd64/AsmMacros.inc | 23 ++ src/coreclr/vm/arm64/asmhelpers.S | 9 +- src/coreclr/vm/arm64/asmhelpers.asm | 9 +- src/coreclr/vm/arm64/asmmacros.h | 27 ++ src/coreclr/vm/excep.cpp | 349 ++++++++++++++++-- src/coreclr/vm/frames.h | 5 + src/coreclr/vm/jithelpers.cpp | 109 ++---- src/coreclr/vm/loongarch64/asmhelpers.S | 6 +- src/coreclr/vm/riscv64/asmhelpers.S | 6 +- 13 files changed, 509 insertions(+), 123 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 2d27459372b561..3669595086d4e9 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -468,6 +468,32 @@ C_FUNC(\Name\()_End): add \target, sp, #192 .endm +// Epilog for PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS - restores FP callee-saved and returns +.macro POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + + // Restore FP callee-saved registers (d8-d15) from sp+0 + ldr d8, [sp, #0] + ldr d9, [sp, #8] + ldr d10, [sp, #16] + ldr d11, [sp, #24] + ldr d12, [sp, #32] + ldr d13, [sp, #40] + ldr d14, [sp, #48] + ldr d15, [sp, #56] + + // Deallocate space for FloatArgumentRegisters + FP callee-saved + EPILOG_STACK_FREE 192 + + // Restore callee-saved registers + EPILOG_RESTORE_REG_PAIR x27, x28, 80 + EPILOG_RESTORE_REG_PAIR x25, x26, 64 + EPILOG_RESTORE_REG_PAIR x23, x24, 48 + EPILOG_RESTORE_REG_PAIR x21, x22, 32 + EPILOG_RESTORE_REG_PAIR x19, x20, 16 + + EPILOG_RESTORE_REG_PAIR_RET fp, lr, 176 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc index 92d701598f933e..b33d7377c327dc 100644 --- a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc @@ -443,6 +443,31 @@ C_FUNC(\Name\()_End): addi.d \target, $sp, 128 .endm +// ------------------------------------------------------------------ +// Epilog macro for PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS that restores all registers and returns. +// Use this for JIT helpers that may return normally (unlike IL_Throw which never returns). +.macro POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + // Restore FP callee-saved registers (f24-f31) from offset 0 + fld.d $f24, $sp, 0 + fld.d $f25, $sp, 8 + fld.d $f26, $sp, 16 + fld.d $f27, $sp, 24 + fld.d $f28, $sp, 32 + fld.d $f29, $sp, 40 + fld.d $f30, $sp, 48 + fld.d $f31, $sp, 56 + + // Restore callee-saved registers from offset 128 + RESTORE_CALLEESAVED_REGISTERS $sp, 128 + + // Restore fp, ra + EPILOG_RESTORE_REG_PAIR 22, 1, 128 + + // Deallocate stack and return + EPILOG_STACK_FREE 288 + EPILOG_RETURN +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index d244756c304eb9..5e0f6d27261f72 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -394,6 +394,35 @@ C_FUNC(\Name): addi \target, sp, 160 .endm +// ------------------------------------------------------------------ +// Epilog macro for PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS that restores all registers and returns. +// Use this for JIT helpers that may return normally (unlike IL_Throw which never returns). +.macro POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + // Restore FP callee-saved registers (fs0-fs11 = f8,f9,f18-f27) from offset 0 + fld fs0, 0(sp) // f8 + fld fs1, 8(sp) // f9 + fld fs2, 16(sp) // f18 + fld fs3, 24(sp) // f19 + fld fs4, 32(sp) // f20 + fld fs5, 40(sp) // f21 + fld fs6, 48(sp) // f22 + fld fs7, 56(sp) // f23 + fld fs8, 64(sp) // f24 + fld fs9, 72(sp) // f25 + fld fs10, 80(sp) // f26 + fld fs11, 88(sp) // f27 + + // Restore callee-saved registers from offset 160 + RESTORE_CALLEESAVED_REGISTERS sp, 160 + + // Restore fp, ra + EPILOG_RESTORE_REG_PAIR fp, ra, 160 + + // Deallocate stack and return + EPILOG_STACK_FREE 352 + EPILOG_RETURN +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index dc8c5eeec42f91..4cf333d4195f07 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -402,12 +402,15 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT extern JIT_PatchpointWorkerWorkerWithPolicy:proc NESTED_ENTRY JIT_Patchpoint, _TEXT - PROLOG_WITH_TRANSITION_BLOCK + ; Use PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS to save all registers including FP callee-saved + ; This allows us to build a complete CONTEXT from TransitionBlock without RtlCaptureContext + PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS rcx - lea rcx, [rsp + __PWTB_TransitionBlock] ; TransitionBlock * + ; RCX contains pointer to TransitionBlock call JIT_PatchpointWorkerWorkerWithPolicy - EPILOG_WITH_TRANSITION_BLOCK_RETURN + ; If we return, restore all registers and return to caller + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN NESTED_END JIT_Patchpoint, _TEXT ; first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index 07531371d6627d..4484995a9724bd 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -535,5 +535,28 @@ PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS macro target endm +; Epilog for PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS - restores XMM callee-saved and returns +POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN macro + + ; Restore FP callee-saved registers (xmm6-xmm15) from offset 32 (after shadow space) + movdqa xmm6, [rsp + 20h] + movdqa xmm7, [rsp + 30h] + movdqa xmm8, [rsp + 40h] + movdqa xmm9, [rsp + 50h] + movdqa xmm10, [rsp + 60h] + movdqa xmm11, [rsp + 70h] + movdqa xmm12, [rsp + 80h] + movdqa xmm13, [rsp + 90h] + movdqa xmm14, [rsp + 0A0h] + movdqa xmm15, [rsp + 0B0h] + + ; Deallocate stack space (296 bytes) + add rsp, 296 + + POP_CALLEE_SAVED_REGISTERS + ret + + endm + ;; GC type flags GC_ALLOC_FINALIZE equ 1 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 127a5c0a118245..181687cddf0489 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -584,12 +584,15 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK + // Use PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS to save all registers including FP callee-saved + // This allows us to build a complete CONTEXT from TransitionBlock without RtlCaptureContext + PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS x0 - add x0, sp, #__PWTB_TransitionBlock // TransitionBlock * + // x0 contains pointer to TransitionBlock bl C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) - EPILOG_WITH_TRANSITION_BLOCK_RETURN + // If we return, restore all registers and return to caller + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 71448ca3948d27..874bacbc51f4e8 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1011,12 +1011,15 @@ __HelperNakedFuncName SETS "$helper":CC:"Naked" IMPORT JIT_PatchpointWorkerWorkerWithPolicy NESTED_ENTRY JIT_Patchpoint - PROLOG_WITH_TRANSITION_BLOCK + ; Use PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS to save all registers including FP callee-saved + ; This allows us to build a complete CONTEXT from TransitionBlock without RtlCaptureContext + PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS x0 - add x0, sp, #__PWTB_TransitionBlock ; TransitionBlock * + ; x0 contains pointer to TransitionBlock bl JIT_PatchpointWorkerWorkerWithPolicy - EPILOG_WITH_TRANSITION_BLOCK_RETURN + ; If we return, restore all registers and return to caller + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN NESTED_END // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/arm64/asmmacros.h b/src/coreclr/vm/arm64/asmmacros.h index 93778d775f87c9..7ac61354704073 100644 --- a/src/coreclr/vm/arm64/asmmacros.h +++ b/src/coreclr/vm/arm64/asmmacros.h @@ -253,6 +253,33 @@ OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context add $Target, sp, #192 MEND +; Epilog for PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS - restores FP callee-saved and returns + MACRO + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + + ; Restore FP callee-saved registers (d8-d15) from sp+0 + ldr d8, [sp, #0] + ldr d9, [sp, #8] + ldr d10, [sp, #16] + ldr d11, [sp, #24] + ldr d12, [sp, #32] + ldr d13, [sp, #40] + ldr d14, [sp, #48] + ldr d15, [sp, #56] + + ; Deallocate space for FloatArgumentRegisters + FP callee-saved + EPILOG_STACK_FREE 192 + + ; Restore callee-saved registers + EPILOG_RESTORE_REG_PAIR x27, x28, #80 + EPILOG_RESTORE_REG_PAIR x25, x26, #64 + EPILOG_RESTORE_REG_PAIR x23, x24, #48 + EPILOG_RESTORE_REG_PAIR x21, x22, #32 + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + + EPILOG_RESTORE_REG_PAIR_RET fp, lr, #176! + MEND + #define GC_ALLOC_FINALIZE 1 ;----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 8b670c6ea4911d..ae237a120e1816 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -5463,8 +5463,9 @@ static uintptr_t writeBarrierAVLocations[] = }; // Check if the passed in instruction pointer is in one of the -// JIT helper functions. -bool IsIPInMarkedJitHelper(PCODE uControlPc) +// write barrier helper functions. These are leaf functions that do not +// set up a frame, so we can unwind them with a simple LR/RA extraction. +static bool IsIPInWriteBarrierHelper(PCODE uControlPc) { LIMITED_METHOD_CONTRACT; @@ -5482,29 +5483,135 @@ bool IsIPInMarkedJitHelper(PCODE uControlPc) return true; } -#define CHECK_RANGE(name) \ +#define CHECK_WRITE_BARRIER_RANGE(name) \ if (GetEEFuncEntryPoint(name) <= uControlPc && uControlPc < GetEEFuncEntryPoint(name##_End)) return true; #ifndef TARGET_X86 - CHECK_RANGE(JIT_WriteBarrier) - CHECK_RANGE(JIT_CheckedWriteBarrier) - CHECK_RANGE(JIT_ByRefWriteBarrier) -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - CHECK_RANGE(JIT_StackProbe) -#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 + CHECK_WRITE_BARRIER_RANGE(JIT_WriteBarrier) + CHECK_WRITE_BARRIER_RANGE(JIT_CheckedWriteBarrier) + CHECK_WRITE_BARRIER_RANGE(JIT_ByRefWriteBarrier) #else - CHECK_RANGE(JIT_WriteBarrierGroup) - CHECK_RANGE(JIT_PatchedWriteBarrierGroup) + CHECK_WRITE_BARRIER_RANGE(JIT_WriteBarrierGroup) + CHECK_WRITE_BARRIER_RANGE(JIT_PatchedWriteBarrierGroup) #endif // TARGET_X86 #if defined(TARGET_AMD64) && defined(_DEBUG) - CHECK_RANGE(JIT_WriteBarrier_Debug) + CHECK_WRITE_BARRIER_RANGE(JIT_WriteBarrier_Debug) #endif + +#undef CHECK_WRITE_BARRIER_RANGE #endif // !FEATURE_PORTABLE_HELPERS return false; } +// Check if the passed in instruction pointer is in JIT_StackProbe. +// JIT_StackProbe exists on AMD64 and ARM only. +static bool IsIPInJITStackProbe(PCODE uControlPc) +{ + LIMITED_METHOD_CONTRACT; + +#ifndef FEATURE_PORTABLE_HELPERS +#if !defined(TARGET_X86) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + if (GetEEFuncEntryPoint(JIT_StackProbe) <= uControlPc && uControlPc < GetEEFuncEntryPoint(JIT_StackProbe_End)) + return true; +#endif // !TARGET_X86 && !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 +#endif // !FEATURE_PORTABLE_HELPERS + + return false; +} + +// Check if the passed in instruction pointer is in one of the +// JIT helper functions. +bool IsIPInMarkedJitHelper(PCODE uControlPc) +{ + LIMITED_METHOD_CONTRACT; + + if (IsIPInWriteBarrierHelper(uControlPc)) + return true; + + if (IsIPInJITStackProbe(uControlPc)) + return true; + + return false; +} + +// Unwind JIT_StackProbe to its caller. JIT_StackProbe has different frame layouts +// depending on the platform: +// - AMD64 Windows: leaf function (no frame), return address at [RSP] +// - AMD64 Unix: RBP frame (push rbp; mov rbp, rsp), return address at [RBP+8] +// - ARM: R7 frame (push {r7}; mov r7, sp), return address in LR (already saved by caller) +static void UnwindJITStackProbeToCaller(CONTEXT* pContext) +{ + LIMITED_METHOD_CONTRACT; + +#if defined(TARGET_AMD64) +#ifdef TARGET_UNIX + // AMD64 Unix: JIT_StackProbe has an RBP frame (push rbp; mov rbp, rsp) + // Return address is at [RBP + 8], saved RBP is at [RBP] + TADDR rbp = GetFP(pContext); + PCODE returnAddress = *dac_cast(rbp + 8); + SetIP(pContext, returnAddress); + SetFP(pContext, *dac_cast(rbp)); // restore RBP + SetSP(pContext, rbp + 16); // RSP after ret = RBP + 16 +#else // TARGET_WINDOWS + // AMD64 Windows: JIT_StackProbe is a leaf function (no frame) + // Return address is at [RSP], simulate a ret instruction + PCODE returnAddress = *dac_cast(GetSP(pContext)); + SetIP(pContext, returnAddress); + SetSP(pContext, GetSP(pContext) + sizeof(void*)); // pop the stack +#endif // TARGET_UNIX +#elif defined(TARGET_ARM) + // ARM: JIT_StackProbe has an R7 frame (push {r7}; mov r7, sp) + // At the point of AV, R7 contains the frame pointer. + // Return address is in LR (ARM calling convention, caller saved LR before call). + // Saved R7 is at [R7], we need to restore R7 and SP. + TADDR r7 = pContext->R7; + SetIP(pContext, pContext->Lr); + pContext->R7 = *dac_cast(r7); // restore R7 + SetSP(pContext, r7 + 4); // SP after pop {r7} and ret +#else + // JIT_StackProbe doesn't exist on other architectures + UNREACHABLE(); +#endif +} + +// Unwind a write barrier helper to its caller. Write barriers are leaf functions +// that do not set up a frame, so we can unwind them by simply extracting the +// return address from LR/RA (on ARM/RISC-V) or from the stack (on x86/x64). +// +// Similar to NativeAOT's UnwindSimpleHelperToCaller in EHHelpers.cpp. +static void UnwindWriteBarrierToCaller(CONTEXT* pContext) +{ + LIMITED_METHOD_CONTRACT; + +#if defined(TARGET_AMD64) + // On x64, return address is at [RSP], simulate a ret instruction + PCODE returnAddress = *dac_cast(GetSP(pContext)); + SetIP(pContext, returnAddress); + SetSP(pContext, GetSP(pContext) + sizeof(void*)); // pop the stack +#elif defined(TARGET_X86) + // On x86, return address is at [ESP], simulate a ret instruction + PCODE returnAddress = *dac_cast(GetSP(pContext)); + SetIP(pContext, returnAddress); + SetSP(pContext, GetSP(pContext) + sizeof(void*)); // pop the stack +#elif defined(TARGET_ARM64) + // On ARM64, return address is in LR, no stack adjustment needed for leaf + SetIP(pContext, pContext->Lr); +#elif defined(TARGET_ARM) + // On ARM, return address is in LR + SetIP(pContext, pContext->Lr); +#elif defined(TARGET_LOONGARCH64) + // On LoongArch64, return address is in RA + SetIP(pContext, pContext->Ra); +#elif defined(TARGET_RISCV64) + // On RISC-V64, return address is in RA + SetIP(pContext, pContext->Ra); +#else +#error "UnwindWriteBarrierToCaller not implemented for this architecture" +#endif +} + // Returns TRUE if caller should resume execution. BOOL AdjustContextForJITHelpers( @@ -5599,7 +5706,9 @@ AdjustContextForJITHelpers( CONTEXT tempContext; CONTEXT* pExceptionContext = pContext; - BOOL fExcluded = IsIPInMarkedJitHelper(f_IP); + bool fInWriteBarrier = IsIPInWriteBarrierHelper(f_IP); + bool fInJITStackProbe = IsIPInJITStackProbe(f_IP); + BOOL fExcluded = fInWriteBarrier || fInJITStackProbe; if (fExcluded) { @@ -5611,33 +5720,32 @@ AdjustContextForJITHelpers( pContext = &tempContext; } - Thread::VirtualUnwindToFirstManagedCallFrame(pContext); + if (fInWriteBarrier) + { + // Write barriers are leaf functions that do not set up a frame. + // We can unwind them with a simple LR/RA/stack-pop extraction. + UnwindWriteBarrierToCaller(pContext); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // We had an AV in the writebarrier that needs to be treated - // as originating in managed code. At this point, the stack (growing - // from left->right) looks like this: - // - // ManagedFunc -> Native_WriteBarrierInVM -> AV - // - // We just performed an unwind from the write-barrier - // and now have the context in ManagedFunc. Since - // ManagedFunc called into the write-barrier, the return - // address in the unwound context corresponds to the - // instruction where the call will return. - // - // On ARM, just like we perform ControlPC adjustment - // during exception dispatch (refer to ExInfo::InitializeCrawlFrame), - // we will need to perform the corresponding adjustment of IP - // we got from unwind above, so as to indicate that the AV - // happened "before" the call to the writebarrier and not at - // the instruction at which the control will return. - PCODE ControlPCPostAdjustment = GetIP(pContext) - STACKWALK_CONTROLPC_ADJUST_OFFSET; - - // Now we save the address back into the context so that it gets used - // as the faulting address. - SetIP(pContext, ControlPCPostAdjustment); + // On ARM/ARM64/LoongArch64/RISC-V, adjust IP to point to the call instruction + // rather than the return address, so the exception appears to originate + // from the managed code that called the write barrier. + PCODE ControlPCPostAdjustment = GetIP(pContext) - STACKWALK_CONTROLPC_ADJUST_OFFSET; + SetIP(pContext, ControlPCPostAdjustment); #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 + } + else if (fInJITStackProbe) + { + // JIT_StackProbe has a known frame layout on each platform. + UnwindJITStackProbeToCaller(pContext); + +#if defined(TARGET_ARM) + // On ARM, adjust IP to point to the call instruction + // rather than the return address. + PCODE ControlPCPostAdjustment = GetIP(pContext) - STACKWALK_CONTROLPC_ADJUST_OFFSET; + SetIP(pContext, ControlPCPostAdjustment); +#endif // TARGET_ARM + } // Unwind the frame chain - On Win64, this is required since we may handle the managed fault and to do so, // we will replace the exception context with the managed context and "continue execution" there. Thus, we do not @@ -11140,4 +11248,171 @@ void SoftwareExceptionFrame::InitAndLink(Thread *pThread) Push(pThread); } +// Static helper to populate a CONTEXT from a TransitionBlock for OSR transitions. +// This shares similar logic with UpdateContextFromTransitionBlock but also handles +// platform-specific adjustments needed for OSR (like simulating the call stack alignment). +// +// Returns the adjusted SP and FP values that the OSR method should use. +void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTransitionBlock, CONTEXT* pContext, + UINT_PTR* pCurrentSP, UINT_PTR* pCurrentFP) +{ + LIMITED_METHOD_CONTRACT; + +#if defined(TARGET_AMD64) +#if defined(TARGET_WINDOWS) + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + + // Read FP callee-saved registers (xmm6-xmm15) from the stack + // Layout: [shadow (32)] [xmm6-xmm15 (160)] [xmm0-xmm3 (64)] [arg regs (32)] [padding (8)] [CalleeSavedRegs] [RetAddr] + // xmm6 is at sp+32, TransitionBlock is at sp+296, so xmm6 is at TransitionBlock - 264 + M128A *pFpCalleeSaved = (M128A*)((BYTE*)pTransitionBlock - 264); + + pContext->Xmm6 = pFpCalleeSaved[0]; + pContext->Xmm7 = pFpCalleeSaved[1]; + pContext->Xmm8 = pFpCalleeSaved[2]; + pContext->Xmm9 = pFpCalleeSaved[3]; + pContext->Xmm10 = pFpCalleeSaved[4]; + pContext->Xmm11 = pFpCalleeSaved[5]; + pContext->Xmm12 = pFpCalleeSaved[6]; + pContext->Xmm13 = pFpCalleeSaved[7]; + pContext->Xmm14 = pFpCalleeSaved[8]; + pContext->Xmm15 = pFpCalleeSaved[9]; + + // Initialize FP control/status + pContext->FltSave.ControlWord = 0x27F; + pContext->FltSave.MxCsr = 0x1F80; + pContext->FltSave.MxCsr_Mask = 0x1FFF; + pContext->MxCsr = 0x1F80; +#else // UNIX_AMD64_ABI + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; +#endif + + // Copy integer callee-saved registers from TransitionBlock + pContext->Rbx = pTransitionBlock->m_calleeSavedRegisters.Rbx; + pContext->Rbp = pTransitionBlock->m_calleeSavedRegisters.Rbp; + pContext->R12 = pTransitionBlock->m_calleeSavedRegisters.R12; + pContext->R13 = pTransitionBlock->m_calleeSavedRegisters.R13; + pContext->R14 = pTransitionBlock->m_calleeSavedRegisters.R14; + pContext->R15 = pTransitionBlock->m_calleeSavedRegisters.R15; +#if defined(TARGET_WINDOWS) + pContext->Rdi = pTransitionBlock->m_calleeSavedRegisters.Rdi; + pContext->Rsi = pTransitionBlock->m_calleeSavedRegisters.Rsi; +#endif + + // SP points just past the TransitionBlock (after return address) + // Adjust for call simulation: OSR method expects SP as if a call just happened + *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); + _ASSERTE(*pCurrentSP % 16 == 0); + *pCurrentSP -= 8; // Simulate the call pushing return address + *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.Rbp; + +#elif defined(TARGET_ARM64) + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + + // Copy callee-saved registers from TransitionBlock + pContext->X19 = pTransitionBlock->m_calleeSavedRegisters.x19; + pContext->X20 = pTransitionBlock->m_calleeSavedRegisters.x20; + pContext->X21 = pTransitionBlock->m_calleeSavedRegisters.x21; + pContext->X22 = pTransitionBlock->m_calleeSavedRegisters.x22; + pContext->X23 = pTransitionBlock->m_calleeSavedRegisters.x23; + pContext->X24 = pTransitionBlock->m_calleeSavedRegisters.x24; + pContext->X25 = pTransitionBlock->m_calleeSavedRegisters.x25; + pContext->X26 = pTransitionBlock->m_calleeSavedRegisters.x26; + pContext->X27 = pTransitionBlock->m_calleeSavedRegisters.x27; + pContext->X28 = pTransitionBlock->m_calleeSavedRegisters.x28; + pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.x29; // frame pointer + pContext->Lr = pTransitionBlock->m_calleeSavedRegisters.x30; // link register + + // Read FP callee-saved registers (d8-d15) from the stack + // TransitionBlock is at sp+192, FP callee-saved at sp+0, so offset is -192 + double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 192); + pContext->V[8] = *(unsigned __int64*)&pFpCalleeSaved[0]; + pContext->V[9] = *(unsigned __int64*)&pFpCalleeSaved[1]; + pContext->V[10] = *(unsigned __int64*)&pFpCalleeSaved[2]; + pContext->V[11] = *(unsigned __int64*)&pFpCalleeSaved[3]; + pContext->V[12] = *(unsigned __int64*)&pFpCalleeSaved[4]; + pContext->V[13] = *(unsigned __int64*)&pFpCalleeSaved[5]; + pContext->V[14] = *(unsigned __int64*)&pFpCalleeSaved[6]; + pContext->V[15] = *(unsigned __int64*)&pFpCalleeSaved[7]; + + // SP points just past the TransitionBlock + *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); + *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.x29; + +#elif defined(TARGET_LOONGARCH64) + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + + // Copy callee-saved registers from TransitionBlock + pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.fp; + pContext->Ra = pTransitionBlock->m_calleeSavedRegisters.ra; + pContext->S0 = pTransitionBlock->m_calleeSavedRegisters.s0; + pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; + pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; + pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; + pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; + pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; + pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; + pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; + pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; + + // Read FP callee-saved registers (f24-f31) from the stack + // TransitionBlock is at sp+128, FP callee-saved at sp+0, so offset is -128 + double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 128); + pContext->F[24] = *(UINT64*)&pFpCalleeSaved[0]; + pContext->F[25] = *(UINT64*)&pFpCalleeSaved[1]; + pContext->F[26] = *(UINT64*)&pFpCalleeSaved[2]; + pContext->F[27] = *(UINT64*)&pFpCalleeSaved[3]; + pContext->F[28] = *(UINT64*)&pFpCalleeSaved[4]; + pContext->F[29] = *(UINT64*)&pFpCalleeSaved[5]; + pContext->F[30] = *(UINT64*)&pFpCalleeSaved[6]; + pContext->F[31] = *(UINT64*)&pFpCalleeSaved[7]; + + *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); + *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.fp; + +#elif defined(TARGET_RISCV64) + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + + // Copy callee-saved registers from TransitionBlock + pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.fp; + pContext->Ra = pTransitionBlock->m_calleeSavedRegisters.ra; + pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; + pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; + pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; + pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; + pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; + pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; + pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; + pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; + pContext->S9 = pTransitionBlock->m_calleeSavedRegisters.s9; + pContext->S10 = pTransitionBlock->m_calleeSavedRegisters.s10; + pContext->S11 = pTransitionBlock->m_calleeSavedRegisters.s11; + pContext->Tp = pTransitionBlock->m_calleeSavedRegisters.tp; + pContext->Gp = pTransitionBlock->m_calleeSavedRegisters.gp; + + // Read FP callee-saved registers (fs0-fs11) from the stack + // TransitionBlock is at sp+160, FP callee-saved at sp+0, so offset is -160 + double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 160); + // RISC-V FP callee-saved: fs0=f8, fs1=f9, fs2-fs11=f18-f27 + pContext->F[8] = *(UINT64*)&pFpCalleeSaved[0]; // fs0 + pContext->F[9] = *(UINT64*)&pFpCalleeSaved[1]; // fs1 + pContext->F[18] = *(UINT64*)&pFpCalleeSaved[2]; // fs2 + pContext->F[19] = *(UINT64*)&pFpCalleeSaved[3]; // fs3 + pContext->F[20] = *(UINT64*)&pFpCalleeSaved[4]; // fs4 + pContext->F[21] = *(UINT64*)&pFpCalleeSaved[5]; // fs5 + pContext->F[22] = *(UINT64*)&pFpCalleeSaved[6]; // fs6 + pContext->F[23] = *(UINT64*)&pFpCalleeSaved[7]; // fs7 + pContext->F[24] = *(UINT64*)&pFpCalleeSaved[8]; // fs8 + pContext->F[25] = *(UINT64*)&pFpCalleeSaved[9]; // fs9 + pContext->F[26] = *(UINT64*)&pFpCalleeSaved[10]; // fs10 + pContext->F[27] = *(UINT64*)&pFpCalleeSaved[11]; // fs11 + + *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); + *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.fp; + +#else +#error "Unsupported platform for OSR TransitionBlock-based context capture" +#endif +} + #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 8ec28c47132c6e..30e53602bc162f 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1033,6 +1033,11 @@ class SoftwareExceptionFrame : public Frame } void UpdateContextFromTransitionBlock(TransitionBlock *pTransitionBlock); + + // Static helper to populate a CONTEXT from a TransitionBlock for OSR transitions. + // Returns the adjusted SP and FP values that the OSR method should use. + static void UpdateContextForOSRTransition(TransitionBlock* pTransitionBlock, CONTEXT* pContext, + UINT_PTR* pCurrentSP, UINT_PTR* pCurrentFP); #endif TADDR GetReturnAddressPtr_Impl() diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 0520a652ff4ac1..2339cc5c1eddee 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1624,95 +1624,62 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti pThread->UnhijackThread(); #endif - // Find context for the original method CONTEXT *pFrameContext = NULL; -#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) - DWORD contextSize = 0; - ULONG64 xStateCompactionMask = 0; - DWORD contextFlags = CONTEXT_FULL; - if (Thread::AreShadowStacksEnabled()) - { - xStateCompactionMask = XSTATE_MASK_CET_U; - contextFlags |= CONTEXT_XSTATE; - } - - // The initialize call should fail but return contextSize - BOOL success = g_pfnInitializeContext2 ? - g_pfnInitializeContext2(NULL, contextFlags, NULL, &contextSize, xStateCompactionMask) : - InitializeContext(NULL, contextFlags, NULL, &contextSize); - - _ASSERTE(!success && (GetLastError() == ERROR_INSUFFICIENT_BUFFER)); + UINT_PTR currentSP; + UINT_PTR currentFP; - PVOID pBuffer = _alloca(contextSize); - success = g_pfnInitializeContext2 ? - g_pfnInitializeContext2(pBuffer, contextFlags, &pFrameContext, &contextSize, xStateCompactionMask) : - InitializeContext(pBuffer, contextFlags, &pFrameContext, &contextSize); - _ASSERTE(success); -#else // TARGET_WINDOWS && TARGET_AMD64 + // Build CONTEXT directly from TransitionBlock - this completely bypasses + // RtlCaptureContext, VirtualUnwindToFirstManagedCallFrame, and RtlVirtualUnwind. CONTEXT frameContext; - frameContext.ContextFlags = CONTEXT_FULL; + memset(&frameContext, 0, sizeof(frameContext)); pFrameContext = &frameContext; -#endif // TARGET_WINDOWS && TARGET_AMD64 - - // Find context for the original method - RtlCaptureContext(pFrameContext); #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + // Handle extended context for shadow stacks if enabled + CONTEXT* pAllocatedContext = NULL; if (Thread::AreShadowStacksEnabled()) { - pFrameContext->ContextFlags |= CONTEXT_XSTATE; - SetXStateFeaturesMask(pFrameContext, xStateCompactionMask); - SetSSP(pFrameContext, _rdsspq()); - } -#endif // TARGET_WINDOWS && TARGET_AMD64 + DWORD contextSize = 0; + ULONG64 xStateCompactionMask = XSTATE_MASK_CET_U; + DWORD contextFlags = CONTEXT_FULL | CONTEXT_XSTATE; - // Walk back to the original method frame - pThread->VirtualUnwindToFirstManagedCallFrame(pFrameContext); + // The initialize call should fail but return contextSize + BOOL success = g_pfnInitializeContext2 ? + g_pfnInitializeContext2(NULL, contextFlags, NULL, &contextSize, xStateCompactionMask) : + InitializeContext(NULL, contextFlags, NULL, &contextSize); - // Remember original method FP and SP because new method will inherit them. - UINT_PTR currentSP = GetSP(pFrameContext); - UINT_PTR currentFP = GetFP(pFrameContext); + _ASSERTE(!success && (GetLastError() == ERROR_INSUFFICIENT_BUFFER)); - // We expect to be back at the right IP - if ((UINT_PTR)ip != GetIP(pFrameContext)) - { - // Should be fatal - STRESS_LOG2(LF_TIEREDCOMPILATION, LL_FATALERROR, "Jit_Patchpoint: patchpoint (0x%p) TRANSITION" - " unexpected context IP 0x%p\n", ip, GetIP(pFrameContext)); - EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); + PVOID pBuffer = _alloca(contextSize); + success = g_pfnInitializeContext2 ? + g_pfnInitializeContext2(pBuffer, contextFlags, &pAllocatedContext, &contextSize, xStateCompactionMask) : + InitializeContext(pBuffer, contextFlags, &pAllocatedContext, &contextSize); + _ASSERTE(success); + + pFrameContext = pAllocatedContext; } +#endif - // Now unwind back to the original method caller frame. - EECodeInfo callerCodeInfo(GetIP(pFrameContext)); - ULONG_PTR establisherFrame = 0; - PVOID handlerData = NULL; - RtlVirtualUnwind(UNW_FLAG_NHANDLER, callerCodeInfo.GetModuleBase(), GetIP(pFrameContext), callerCodeInfo.GetFunctionEntry(), - pFrameContext, &handlerData, &establisherFrame, NULL); + // Use the shared helper to populate the CONTEXT from TransitionBlock + SoftwareExceptionFrame::UpdateContextForOSRTransition(pTransitionBlock, pFrameContext, ¤tSP, ¤tFP); - // Now, set FP and SP back to the values they had just before this helper was called, - // since the new method must have access to the original method frame. - // - // TODO: if we access the patchpointInfo here, we can read out the FP-SP delta from there and - // use that to adjust the stack, likely saving some stack space. - -#if defined(TARGET_AMD64) - // If calls push the return address, we need to simulate that here, so the OSR - // method sees the "expected" SP misalgnment on entry. - _ASSERTE(currentSP % 16 == 0); - currentSP -= 8; - -#if defined(TARGET_WINDOWS) - DWORD64 ssp = GetSSP(pFrameContext); - if (ssp != 0) +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + if (Thread::AreShadowStacksEnabled()) { - SetSSP(pFrameContext, ssp - 8); - } -#endif // TARGET_WINDOWS + pFrameContext->ContextFlags |= CONTEXT_XSTATE; + SetXStateFeaturesMask(pFrameContext, XSTATE_MASK_CET_U); + SetSSP(pFrameContext, _rdsspq()); - pFrameContext->Rbp = currentFP; -#endif // TARGET_AMD64 + DWORD64 ssp = GetSSP(pFrameContext); + if (ssp != 0) + { + SetSSP(pFrameContext, ssp - 8); // Simulate call pushing shadow stack + } + } +#endif SetSP(pFrameContext, currentSP); + SetFP(pFrameContext, currentFP); // Note we can get here w/o triggering, if there is an existing OSR method and // we hit the patchpoint. diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 9f424c39dd30f5..a3dd48971e3cec 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -954,12 +954,12 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK + PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS $a0 - addi.d $a0, $sp, __PWTB_TransitionBlock // TransitionBlock * + // $a0 already contains TransitionBlock pointer from macro bl C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) - EPILOG_WITH_TRANSITION_BLOCK_RETURN + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 0d26ef514d4f58..71947103c88f61 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -827,12 +827,12 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK + PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS a0 - addi a0, sp, __PWTB_TransitionBlock // TransitionBlock * + // a0 already contains TransitionBlock pointer from macro call C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) - EPILOG_WITH_TRANSITION_BLOCK_RETURN + POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL From 6ec9ed6047cb2d7c7011e1c8408d9a95faadec7d Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Fri, 16 Jan 2026 22:50:53 +0000 Subject: [PATCH 02/14] fixes for win-x64 --- src/coreclr/vm/amd64/AsmMacros.inc | 21 +++++++++++---------- src/coreclr/vm/excep.cpp | 12 ++++++------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index 4484995a9724bd..3baf14dcfaef4a 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -503,13 +503,14 @@ PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS macro target PUSH_CALLEE_SAVED_REGISTERS - ; Allocate space for: shadow for call (32) + FP callee-saved (160) + float args (64) + arg regs (32) + padding (8) = 296 bytes - ; Shadow space at offset 0 is reserved for the call to IL_Throw_Impl etc. - ; This makes RSP 16-byte aligned (8 + 64 + 296 = 368, and original RSP - 368 is 16-byte aligned) - alloc_stack 296 + ; Allocate space for: shadow (32) + FP callee-saved (160) + float args (64) + padding (8) = 264 bytes + ; This makes RSP 16-byte aligned (8 + 64 + 264 = 336, and original RSP - 336 is 16-byte aligned) + alloc_stack 264 - ; Save argument registers at offset 256 (32 + 160 + 64) - SAVE_ARGUMENT_REGISTERS 256 + ; Save argument registers into caller's shadow space + ; TransitionBlock is at rsp + 264, sizeof(TransitionBlock) = 72 + ; So argument registers go at rsp + 264 + 72 = rsp + 336 + SAVE_ARGUMENT_REGISTERS 336 ; Save float argument registers at offset 192 (32 + 160) SAVE_FLOAT_ARGUMENT_REGISTERS 192 @@ -530,8 +531,8 @@ PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS macro target END_PROLOGUE - ; TransitionBlock pointer points to CalleeSavedRegisters at rsp + 296 - lea target, [rsp + 296] + ; TransitionBlock pointer points to CalleeSavedRegisters at rsp + 264 + lea target, [rsp + 264] endm @@ -550,8 +551,8 @@ POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN macro movdqa xmm14, [rsp + 0A0h] movdqa xmm15, [rsp + 0B0h] - ; Deallocate stack space (296 bytes) - add rsp, 296 + ; Deallocate stack space (264 bytes) + add rsp, 264 POP_CALLEE_SAVED_REGISTERS ret diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index ae237a120e1816..9c751676c1138b 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -10878,9 +10878,9 @@ void SoftwareExceptionFrame::UpdateContextFromTransitionBlock(TransitionBlock *p // Read FP callee-saved registers (xmm6-xmm15) from the stack // They are stored at negative offsets from TransitionBlock: - // Layout: [shadow (32)] [xmm6-xmm15 (160)] [xmm0-xmm3 (64)] [arg regs (32)] [padding (8)] [CalleeSavedRegs] [RetAddr] - // xmm6 is at sp+32, TransitionBlock is at sp+296, so xmm6 is at TransitionBlock - 264 - M128A *pFpCalleeSaved = (M128A*)((BYTE*)pTransitionBlock - 264); + // Layout: [shadow (32)] [xmm6-xmm15 (160)] [xmm0-xmm3 (64)] [padding (8)] [CalleeSavedRegs] [RetAddr] [ArgRegs] + // xmm6 is at sp+32, TransitionBlock is at sp+264, so xmm6 is at TransitionBlock - 232 + M128A *pFpCalleeSaved = (M128A*)((BYTE*)pTransitionBlock - 232); m_Context.Xmm6 = pFpCalleeSaved[0]; m_Context.Xmm7 = pFpCalleeSaved[1]; @@ -11263,9 +11263,9 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; // Read FP callee-saved registers (xmm6-xmm15) from the stack - // Layout: [shadow (32)] [xmm6-xmm15 (160)] [xmm0-xmm3 (64)] [arg regs (32)] [padding (8)] [CalleeSavedRegs] [RetAddr] - // xmm6 is at sp+32, TransitionBlock is at sp+296, so xmm6 is at TransitionBlock - 264 - M128A *pFpCalleeSaved = (M128A*)((BYTE*)pTransitionBlock - 264); + // Layout: [shadow (32)] [xmm6-xmm15 (160)] [xmm0-xmm3 (64)] [padding (8)] [CalleeSavedRegs] [RetAddr] [ArgRegs] + // xmm6 is at sp+32, TransitionBlock is at sp+264, so xmm6 is at TransitionBlock - 232 + M128A *pFpCalleeSaved = (M128A*)((BYTE*)pTransitionBlock - 232); pContext->Xmm6 = pFpCalleeSaved[0]; pContext->Xmm7 = pFpCalleeSaved[1]; From fa3594f0d04b15848890700f19581c78344f5ec6 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 17 Jan 2026 15:52:57 +0200 Subject: [PATCH 03/14] osx-arm64 --- src/coreclr/pal/inc/unixasmmacrosarm64.inc | 3 +- src/coreclr/vm/arm64/asmconstants.h | 15 +++ src/coreclr/vm/arm64/asmhelpers.S | 47 ++++++- src/coreclr/vm/arm64/asmhelpers.asm | 43 +++++- src/coreclr/vm/excep.cpp | 146 ++++----------------- src/coreclr/vm/exceptmacros.h | 4 - src/coreclr/vm/jithelpers.cpp | 1 - src/coreclr/vm/loongarch64/asmhelpers.S | 7 +- src/coreclr/vm/riscv64/asmhelpers.S | 7 +- src/coreclr/vm/threads.cpp | 4 +- 10 files changed, 134 insertions(+), 143 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 3669595086d4e9..c0203dbe3c66e0 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -491,7 +491,8 @@ C_FUNC(\Name\()_End): EPILOG_RESTORE_REG_PAIR x21, x22, 32 EPILOG_RESTORE_REG_PAIR x19, x20, 16 - EPILOG_RESTORE_REG_PAIR_RET fp, lr, 176 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 176 + ret .endm // ------------------------------------------------------------------ diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 9f78786c3da059..2163b2c0924c46 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -101,12 +101,27 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); #endif ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); +#define OFFSETOF__CONTEXT__ContextFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, ContextFlags)); + +// CONTEXT_INTEGER_BIT is bit 1 in ContextFlags (from pal.h CONTEXT_INTEGER definition) +#define CONTEXT_INTEGER_BIT 1 + #define OFFSETOF__CONTEXT__X19 0xA0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__X19 == offsetof(T_CONTEXT, X19)); #define OFFSETOF__CONTEXT__Fp 0xF0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); +#define OFFSETOF__CONTEXT__Lr 0xF8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Lr == offsetof(T_CONTEXT, Lr)); + +#define OFFSETOF__CONTEXT__Sp 0x100 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); + +#define OFFSETOF__CONTEXT__Pc 0x108 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); + #define OFFSETOF__DynamicHelperStubArgs__Constant1 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant1 == offsetof(DynamicHelperStubArgs, Constant1)); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 181687cddf0489..21e6fe4ebdf469 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -584,15 +584,14 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - // Use PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS to save all registers including FP callee-saved - // This allows us to build a complete CONTEXT from TransitionBlock without RtlCaptureContext - PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS x0 + PROLOG_WITH_TRANSITION_BLOCK - // x0 contains pointer to TransitionBlock + // x0 = pointer to TransitionBlock + add x0, sp, #__PWTB_TransitionBlock bl C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) // If we return, restore all registers and return to caller - POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL @@ -2808,3 +2807,41 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler brk #0 NESTED_END IL_Rethrow, _TEXT +// ------------------------------------------------------------------ +// ClrRestoreNonvolatileContextWorker +// +// Restores non-volatile (callee-saved) registers based on ContextFlags and jumps to the target IP. +// This is the ARM64 equivalent of the AMD64 version used for OSR transitions. +// +// Arguments: +// x0 - pointer to CONTEXT structure +// +// Does not return - jumps directly to the Pc stored in the CONTEXT +// ------------------------------------------------------------------ +LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + + // Check ContextFlags to see if we should restore integer registers + ldr w16, [x0, #OFFSETOF__CONTEXT__ContextFlags] + tbz w16, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) + + // Restore callee-saved registers x19-x28 + ldp x19, x20, [x0, #OFFSETOF__CONTEXT__X19] + ldp x21, x22, [x0, #(OFFSETOF__CONTEXT__X19 + 16)] + ldp x23, x24, [x0, #(OFFSETOF__CONTEXT__X19 + 32)] + ldp x25, x26, [x0, #(OFFSETOF__CONTEXT__X19 + 48)] + ldp x27, x28, [x0, #(OFFSETOF__CONTEXT__X19 + 64)] + +LOCAL_LABEL(SkipIntegerRestore): + // Restore fp (x29) and lr (x30) + ldp fp, lr, [x0, #OFFSETOF__CONTEXT__Fp] + + // Load Sp and Pc into temporaries + // We use x16 and x17 as they are IP0/IP1 (intra-procedure call scratch registers) + ldr x16, [x0, #OFFSETOF__CONTEXT__Sp] + ldr x17, [x0, #OFFSETOF__CONTEXT__Pc] + + // Set sp and jump to target + mov sp, x16 + br x17 + +LEAF_END ClrRestoreNonvolatileContextWorker, _TEXT \ No newline at end of file diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 874bacbc51f4e8..d8e9f5fa9a292e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1011,15 +1011,14 @@ __HelperNakedFuncName SETS "$helper":CC:"Naked" IMPORT JIT_PatchpointWorkerWorkerWithPolicy NESTED_ENTRY JIT_Patchpoint - ; Use PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS to save all registers including FP callee-saved - ; This allows us to build a complete CONTEXT from TransitionBlock without RtlCaptureContext - PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS x0 + PROLOG_WITH_TRANSITION_BLOCK - ; x0 contains pointer to TransitionBlock + ; x0 = pointer to TransitionBlock + add x0, sp, #__PWTB_TransitionBlock bl JIT_PatchpointWorkerWorkerWithPolicy ; If we return, restore all registers and return to caller - POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL @@ -3061,5 +3060,39 @@ CopyLoop brk #0 NESTED_END IL_Rethrow +; ------------------------------------------------------------------ +; ClrRestoreNonvolatileContextWorker +; +; Restores non-volatile registers based on ContextFlags and jumps to PC. +; x0 - pointer to CONTEXT structure +; x1 - unused (SSP, not used on ARM64) +; ------------------------------------------------------------------ + LEAF_ENTRY ClrRestoreNonvolatileContextWorker + + ; Check if CONTEXT_INTEGER bit is set + ldr w16, [x0, #OFFSETOF__CONTEXT__ContextFlags] + tbz w16, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 + + ; Restore callee-saved registers x19-x28 + ldp x19, x20, [x0, #OFFSETOF__CONTEXT__X19] + ldp x21, x22, [x0, #(OFFSETOF__CONTEXT__X19 + 16)] + ldp x23, x24, [x0, #(OFFSETOF__CONTEXT__X19 + 32)] + ldp x25, x26, [x0, #(OFFSETOF__CONTEXT__X19 + 48)] + ldp x27, x28, [x0, #(OFFSETOF__CONTEXT__X19 + 64)] + +SkipIntegerRestore + ; Restore fp and lr + ldp fp, lr, [x0, #OFFSETOF__CONTEXT__Fp] + + ; Load Sp and Pc + ldr x16, [x0, #OFFSETOF__CONTEXT__Sp] + ldr x17, [x0, #OFFSETOF__CONTEXT__Pc] + + ; Set sp and jump + mov sp, x16 + br x17 + + LEAF_END ClrRestoreNonvolatileContextWorker + ; Must be at very end of file END diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 9c751676c1138b..d665570aab7651 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6906,39 +6906,6 @@ void ThrowResumeAfterCatchException(TADDR resumeSP, TADDR resumeIP) { throw ResumeAfterCatchException(resumeSP, resumeIP); } - -VOID DECLSPEC_NORETURN UnwindAndContinueResumeAfterCatch(TADDR resumeSP, TADDR resumeIP) -{ - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_TRIGGERS; - STATIC_CONTRACT_MODE_ANY; - - CONTEXT context; - ClrCaptureContext(&context); - - // Unwind to the caller of the Ex.RhThrowEx / Ex.RhThrowHwEx - Thread::VirtualUnwindToFirstManagedCallFrame(&context); - -#if defined(HOST_AMD64) && defined(HOST_WINDOWS) - size_t targetSSP = GetSSPForFrameOnCurrentStack(GetIP(&context)); -#else - size_t targetSSP = 0; -#endif - - // Skip all managed frames upto a native frame - while (ExecutionManager::IsManagedCode(GetIP(&context))) - { - Thread::VirtualUnwindCallFrame(&context); -#if defined(HOST_AMD64) && defined(HOST_WINDOWS) - if (targetSSP != 0) - { - targetSSP += sizeof(size_t); - } -#endif - } - - ExecuteFunctionBelowContext((PCODE)ThrowResumeAfterCatchException, &context, targetSSP, resumeSP, resumeIP); -} #endif // FEATURE_INTERPRETER thread_local DWORD t_dwCurrentExceptionCode; @@ -11307,108 +11274,47 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.Rbp; #elif defined(TARGET_ARM64) - pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + // Only restore control registers - callee-saved regs already have correct CPU values + pContext->ContextFlags = CONTEXT_CONTROL; - // Copy callee-saved registers from TransitionBlock - pContext->X19 = pTransitionBlock->m_calleeSavedRegisters.x19; - pContext->X20 = pTransitionBlock->m_calleeSavedRegisters.x20; - pContext->X21 = pTransitionBlock->m_calleeSavedRegisters.x21; - pContext->X22 = pTransitionBlock->m_calleeSavedRegisters.x22; - pContext->X23 = pTransitionBlock->m_calleeSavedRegisters.x23; - pContext->X24 = pTransitionBlock->m_calleeSavedRegisters.x24; - pContext->X25 = pTransitionBlock->m_calleeSavedRegisters.x25; - pContext->X26 = pTransitionBlock->m_calleeSavedRegisters.x26; - pContext->X27 = pTransitionBlock->m_calleeSavedRegisters.x27; - pContext->X28 = pTransitionBlock->m_calleeSavedRegisters.x28; - pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.x29; // frame pointer - pContext->Lr = pTransitionBlock->m_calleeSavedRegisters.x30; // link register + UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.x29; + TADDR callerFP = *((TADDR*)managedFrameFP); + TADDR callerLR = *((TADDR*)(managedFrameFP + 8)); + + // Use caller's FP so F-OSR returns with correct FP for caller + pContext->Fp = callerFP; + pContext->Lr = callerLR; - // Read FP callee-saved registers (d8-d15) from the stack - // TransitionBlock is at sp+192, FP callee-saved at sp+0, so offset is -192 - double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 192); - pContext->V[8] = *(unsigned __int64*)&pFpCalleeSaved[0]; - pContext->V[9] = *(unsigned __int64*)&pFpCalleeSaved[1]; - pContext->V[10] = *(unsigned __int64*)&pFpCalleeSaved[2]; - pContext->V[11] = *(unsigned __int64*)&pFpCalleeSaved[3]; - pContext->V[12] = *(unsigned __int64*)&pFpCalleeSaved[4]; - pContext->V[13] = *(unsigned __int64*)&pFpCalleeSaved[5]; - pContext->V[14] = *(unsigned __int64*)&pFpCalleeSaved[6]; - pContext->V[15] = *(unsigned __int64*)&pFpCalleeSaved[7]; - - // SP points just past the TransitionBlock *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); - *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.x29; + *pCurrentFP = callerFP; #elif defined(TARGET_LOONGARCH64) - pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + // Only restore control registers - callee-saved regs already have correct CPU values + pContext->ContextFlags = CONTEXT_CONTROL; - // Copy callee-saved registers from TransitionBlock - pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.fp; - pContext->Ra = pTransitionBlock->m_calleeSavedRegisters.ra; - pContext->S0 = pTransitionBlock->m_calleeSavedRegisters.s0; - pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; - pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; - pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; - pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; - pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; - pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; - pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; - pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; + UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.fp; + TADDR callerFP = *((TADDR*)managedFrameFP); + TADDR callerRA = *((TADDR*)(managedFrameFP + 8)); - // Read FP callee-saved registers (f24-f31) from the stack - // TransitionBlock is at sp+128, FP callee-saved at sp+0, so offset is -128 - double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 128); - pContext->F[24] = *(UINT64*)&pFpCalleeSaved[0]; - pContext->F[25] = *(UINT64*)&pFpCalleeSaved[1]; - pContext->F[26] = *(UINT64*)&pFpCalleeSaved[2]; - pContext->F[27] = *(UINT64*)&pFpCalleeSaved[3]; - pContext->F[28] = *(UINT64*)&pFpCalleeSaved[4]; - pContext->F[29] = *(UINT64*)&pFpCalleeSaved[5]; - pContext->F[30] = *(UINT64*)&pFpCalleeSaved[6]; - pContext->F[31] = *(UINT64*)&pFpCalleeSaved[7]; + pContext->Fp = callerFP; + pContext->Ra = callerRA; *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); - *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.fp; + *pCurrentFP = callerFP; #elif defined(TARGET_RISCV64) - pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + // Only restore control registers - callee-saved regs already have correct CPU values + pContext->ContextFlags = CONTEXT_CONTROL; - // Copy callee-saved registers from TransitionBlock - pContext->Fp = pTransitionBlock->m_calleeSavedRegisters.fp; - pContext->Ra = pTransitionBlock->m_calleeSavedRegisters.ra; - pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; - pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; - pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; - pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; - pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; - pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; - pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; - pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; - pContext->S9 = pTransitionBlock->m_calleeSavedRegisters.s9; - pContext->S10 = pTransitionBlock->m_calleeSavedRegisters.s10; - pContext->S11 = pTransitionBlock->m_calleeSavedRegisters.s11; - pContext->Tp = pTransitionBlock->m_calleeSavedRegisters.tp; - pContext->Gp = pTransitionBlock->m_calleeSavedRegisters.gp; + UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.fp; + TADDR callerFP = *((TADDR*)managedFrameFP); + TADDR callerRA = *((TADDR*)(managedFrameFP + 8)); - // Read FP callee-saved registers (fs0-fs11) from the stack - // TransitionBlock is at sp+160, FP callee-saved at sp+0, so offset is -160 - double *pFpCalleeSaved = (double*)((BYTE*)pTransitionBlock - 160); - // RISC-V FP callee-saved: fs0=f8, fs1=f9, fs2-fs11=f18-f27 - pContext->F[8] = *(UINT64*)&pFpCalleeSaved[0]; // fs0 - pContext->F[9] = *(UINT64*)&pFpCalleeSaved[1]; // fs1 - pContext->F[18] = *(UINT64*)&pFpCalleeSaved[2]; // fs2 - pContext->F[19] = *(UINT64*)&pFpCalleeSaved[3]; // fs3 - pContext->F[20] = *(UINT64*)&pFpCalleeSaved[4]; // fs4 - pContext->F[21] = *(UINT64*)&pFpCalleeSaved[5]; // fs5 - pContext->F[22] = *(UINT64*)&pFpCalleeSaved[6]; // fs6 - pContext->F[23] = *(UINT64*)&pFpCalleeSaved[7]; // fs7 - pContext->F[24] = *(UINT64*)&pFpCalleeSaved[8]; // fs8 - pContext->F[25] = *(UINT64*)&pFpCalleeSaved[9]; // fs9 - pContext->F[26] = *(UINT64*)&pFpCalleeSaved[10]; // fs10 - pContext->F[27] = *(UINT64*)&pFpCalleeSaved[11]; // fs11 + pContext->Fp = callerFP; + pContext->Ra = callerRA; *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); - *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.fp; + *pCurrentFP = callerFP; #else #error "Unsupported platform for OSR TransitionBlock-based context capture" diff --git a/src/coreclr/vm/exceptmacros.h b/src/coreclr/vm/exceptmacros.h index c72b673c898ad5..7ddccf1d06bc5f 100644 --- a/src/coreclr/vm/exceptmacros.h +++ b/src/coreclr/vm/exceptmacros.h @@ -205,10 +205,6 @@ VOID DECLSPEC_NORETURN RaiseTheExceptionInternalOnly(OBJECTREF throwable); void UnwindAndContinueRethrowHelperInsideCatch(Frame* pEntryFrame, Exception* pException); VOID DECLSPEC_NORETURN UnwindAndContinueRethrowHelperAfterCatch(Frame* pEntryFrame, Exception* pException, bool nativeRethrow); -#ifdef FEATURE_INTERPRETER -VOID DECLSPEC_NORETURN UnwindAndContinueResumeAfterCatch(TADDR resumeSP, TADDR resumeIP); -#endif // FEATURE_INTERPRETER - #ifdef TARGET_UNIX VOID DECLSPEC_NORETURN DispatchManagedException(PAL_SEHException& ex, bool isHardwareException); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 2339cc5c1eddee..4a9db555a12ec6 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1701,7 +1701,6 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti // Transition! ClrRestoreNonvolatileContext(pFrameContext); } - DONE: ::SetLastError(dwLastError); } diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index a3dd48971e3cec..6a2f1f59e46d06 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -954,12 +954,13 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS $a0 + PROLOG_WITH_TRANSITION_BLOCK - // $a0 already contains TransitionBlock pointer from macro + // $a0 = pointer to TransitionBlock + addi.d $a0, $sp, __PWTB_TransitionBlock bl C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) - POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 71947103c88f61..f4a02d30c92830 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -827,12 +827,13 @@ NESTED_END OnCallCountThresholdReachedStub, _TEXT #ifdef FEATURE_ON_STACK_REPLACEMENT NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler - PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS a0 + PROLOG_WITH_TRANSITION_BLOCK - // a0 already contains TransitionBlock pointer from macro + // a0 = pointer to TransitionBlock + addi a0, sp, __PWTB_TransitionBlock call C_FUNC(JIT_PatchpointWorkerWorkerWithPolicy) - POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN + EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END JIT_Patchpoint, _TEXT // first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 00f17de38ab91c..9ec749fc94c30b 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -6634,7 +6634,7 @@ void Thread::InitializeSpecialUserModeApc() #endif // FEATURE_SPECIAL_USER_MODE_APC -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) EXTERN_C void STDCALL ClrRestoreNonvolatileContextWorker(PCONTEXT ContextRecord, DWORD64 ssp); #endif @@ -6647,6 +6647,8 @@ void ClrRestoreNonvolatileContext(PCONTEXT ContextRecord, size_t targetSSP) targetSSP = GetSSP(ContextRecord); } ClrRestoreNonvolatileContextWorker(ContextRecord, targetSSP); +#elif defined(TARGET_ARM64) + ClrRestoreNonvolatileContextWorker(ContextRecord, 0); #elif defined(TARGET_X86) && defined(TARGET_WINDOWS) // need to pop the SEH records before write over the stack LPVOID oldSP = (LPVOID)GetSP(ContextRecord); From 8856b461c2b1f5f868836e243e759be3fad128be Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:33:52 +0200 Subject: [PATCH 04/14] riscv664 & loongarch64 --- src/coreclr/vm/excep.cpp | 2 ++ src/coreclr/vm/loongarch64/asmconstants.h | 18 ++++++++-- src/coreclr/vm/loongarch64/asmhelpers.S | 41 +++++++++++++++++++++ src/coreclr/vm/riscv64/asmconstants.h | 14 ++++++++ src/coreclr/vm/riscv64/asmhelpers.S | 44 +++++++++++++++++++++++ src/coreclr/vm/threads.cpp | 4 +-- 6 files changed, 119 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index d665570aab7651..8abd4dbc0428b0 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -11220,6 +11220,7 @@ void SoftwareExceptionFrame::InitAndLink(Thread *pThread) // platform-specific adjustments needed for OSR (like simulating the call stack alignment). // // Returns the adjusted SP and FP values that the OSR method should use. +#ifdef FEATURE_ON_STACK_REPLACEMENT void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTransitionBlock, CONTEXT* pContext, UINT_PTR* pCurrentSP, UINT_PTR* pCurrentFP) { @@ -11320,5 +11321,6 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra #error "Unsupported platform for OSR TransitionBlock-based context capture" #endif } +#endif // FEATURE_ON_STACK_REPLACEMENT #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 77404df969160f..73191d6b699f2e 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -99,12 +99,26 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); #define SIZEOF__CONTEXT 0x520 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); -#define OFFSETOF__CONTEXT__S0 0xC0 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S0 == offsetof(T_CONTEXT, S0)); +#define OFFSETOF__CONTEXT__ContextFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, ContextFlags)); + +#define OFFSETOF__CONTEXT__Ra 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); + +#define OFFSETOF__CONTEXT__Sp 0x18 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); #define OFFSETOF__CONTEXT__Fp 0xB8 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); +#define OFFSETOF__CONTEXT__S0 0xC0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S0 == offsetof(T_CONTEXT, S0)); + +#define OFFSETOF__CONTEXT__Pc 0xF8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); + +#define CONTEXT_INTEGER_BIT 1 + //========================================= #define OFFSETOF__MethodTable__m_dwFlags 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 6a2f1f59e46d06..999dbee8584211 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1066,3 +1066,44 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler break 0 NESTED_END IL_Rethrow, _TEXT +// ------------------------------------------------------------------ +// ClrRestoreNonvolatileContextWorker +// +// Restores non-volatile registers based on ContextFlags and jumps to target PC. +// +// Arguments: +// $a0 - pointer to CONTEXT structure +// $a1 - unused (SSP, not supported on LoongArch64) +// ------------------------------------------------------------------ +LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + + // Check if CONTEXT_INTEGER is set + ld.w $t0, $a0, OFFSETOF__CONTEXT__ContextFlags + andi $t1, $t0, (1 << CONTEXT_INTEGER_BIT) + beqz $t1, LOCAL_LABEL(SkipIntegerRestore) + + // Restore callee-saved registers s0-s8 + ld.d $s0, $a0, OFFSETOF__CONTEXT__S0 + ld.d $s1, $a0, (OFFSETOF__CONTEXT__S0 + 8) + ld.d $s2, $a0, (OFFSETOF__CONTEXT__S0 + 16) + ld.d $s3, $a0, (OFFSETOF__CONTEXT__S0 + 24) + ld.d $s4, $a0, (OFFSETOF__CONTEXT__S0 + 32) + ld.d $s5, $a0, (OFFSETOF__CONTEXT__S0 + 40) + ld.d $s6, $a0, (OFFSETOF__CONTEXT__S0 + 48) + ld.d $s7, $a0, (OFFSETOF__CONTEXT__S0 + 56) + ld.d $s8, $a0, (OFFSETOF__CONTEXT__S0 + 64) + +LOCAL_LABEL(SkipIntegerRestore): + // Restore fp, ra + ld.d $fp, $a0, OFFSETOF__CONTEXT__Fp + ld.d $ra, $a0, OFFSETOF__CONTEXT__Ra + + // Load sp and pc + ld.d $t0, $a0, OFFSETOF__CONTEXT__Sp + ld.d $t1, $a0, OFFSETOF__CONTEXT__Pc + + // Set sp and jump + move $sp, $t0 + jr $t1 + +LEAF_END ClrRestoreNonvolatileContextWorker, _TEXT diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 12efe007f07c4f..50ba8149173101 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -94,6 +94,15 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); #define SIZEOF__CONTEXT 0x220 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); +#define OFFSETOF__CONTEXT__ContextFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, ContextFlags)); + +#define OFFSETOF__CONTEXT__Ra 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); + +#define OFFSETOF__CONTEXT__Sp 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); + #define OFFSETOF__CONTEXT__Gp 0x20 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Gp == offsetof(T_CONTEXT, Gp)); @@ -109,6 +118,11 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S1 == offsetof(T_CONTEXT, S1)); #define OFFSETOF__CONTEXT__S2 0x98 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); +#define OFFSETOF__CONTEXT__Pc 0xF0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); + +#define CONTEXT_INTEGER_BIT 1 + //========================================= #define OFFSETOF__MethodTable__m_dwFlags 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index f4a02d30c92830..1fad5fe1021287 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -923,6 +923,50 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler ebreak NESTED_END IL_Rethrow, _TEXT +// ------------------------------------------------------------------ +// ClrRestoreNonvolatileContextWorker +// +// Restores non-volatile registers based on ContextFlags and jumps to target PC. +// +// Arguments: +// a0 - pointer to CONTEXT structure +// a1 - unused (SSP, not supported on RISC-V64) +// ------------------------------------------------------------------ +LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + + // Check if CONTEXT_INTEGER is set + lw t0, OFFSETOF__CONTEXT__ContextFlags(a0) + andi t1, t0, (1 << CONTEXT_INTEGER_BIT) + beqz t1, LOCAL_LABEL(SkipIntegerRestore) + + // Restore callee-saved registers s1-s11 + ld s1, OFFSETOF__CONTEXT__S1(a0) + ld s2, OFFSETOF__CONTEXT__S2(a0) + ld s3, (OFFSETOF__CONTEXT__S2 + 8)(a0) + ld s4, (OFFSETOF__CONTEXT__S2 + 16)(a0) + ld s5, (OFFSETOF__CONTEXT__S2 + 24)(a0) + ld s6, (OFFSETOF__CONTEXT__S2 + 32)(a0) + ld s7, (OFFSETOF__CONTEXT__S2 + 40)(a0) + ld s8, (OFFSETOF__CONTEXT__S2 + 48)(a0) + ld s9, (OFFSETOF__CONTEXT__S2 + 56)(a0) + ld s10, (OFFSETOF__CONTEXT__S2 + 64)(a0) + ld s11, (OFFSETOF__CONTEXT__S2 + 72)(a0) + +LOCAL_LABEL(SkipIntegerRestore): + // Restore fp, ra + ld fp, OFFSETOF__CONTEXT__Fp(a0) + ld ra, OFFSETOF__CONTEXT__Ra(a0) + + // Load sp and pc + ld t0, OFFSETOF__CONTEXT__Sp(a0) + ld t1, OFFSETOF__CONTEXT__Pc(a0) + + // Set sp and jump + mv sp, t0 + jr t1 + +LEAF_END ClrRestoreNonvolatileContextWorker, _TEXT + #ifdef FEATURE_INTERPRETER // Align interpreter stack by adjusting it by 8 bytes diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 9ec749fc94c30b..3ddbbb3f08cbcc 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -6634,7 +6634,7 @@ void Thread::InitializeSpecialUserModeApc() #endif // FEATURE_SPECIAL_USER_MODE_APC -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) EXTERN_C void STDCALL ClrRestoreNonvolatileContextWorker(PCONTEXT ContextRecord, DWORD64 ssp); #endif @@ -6647,7 +6647,7 @@ void ClrRestoreNonvolatileContext(PCONTEXT ContextRecord, size_t targetSSP) targetSSP = GetSSP(ContextRecord); } ClrRestoreNonvolatileContextWorker(ContextRecord, targetSSP); -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) ClrRestoreNonvolatileContextWorker(ContextRecord, 0); #elif defined(TARGET_X86) && defined(TARGET_WINDOWS) // need to pop the SEH records before write over the stack From d27fb81cd25d537b132e12539fa45bc28552458e Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:15:53 +0200 Subject: [PATCH 05/14] wasm build fixes --- src/coreclr/vm/excep.cpp | 3 +++ src/coreclr/vm/loongarch64/asmconstants.h | 10 +++++----- src/coreclr/vm/riscv64/asmconstants.h | 14 +++++++++----- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 8abd4dbc0428b0..c6f9a0a8079cbd 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -5607,6 +5607,9 @@ static void UnwindWriteBarrierToCaller(CONTEXT* pContext) #elif defined(TARGET_RISCV64) // On RISC-V64, return address is in RA SetIP(pContext, pContext->Ra); +#elif defined(TARGET_WASM) + // WASM uses interpreter, write barriers don't fault + UNREACHABLE(); #else #error "UnwindWriteBarrierToCaller not implemented for this architecture" #endif diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 73191d6b699f2e..8727493e0ec103 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -102,19 +102,19 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); #define OFFSETOF__CONTEXT__ContextFlags 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, ContextFlags)); -#define OFFSETOF__CONTEXT__Ra 0x08 +#define OFFSETOF__CONTEXT__Ra 0x10 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); -#define OFFSETOF__CONTEXT__Sp 0x18 +#define OFFSETOF__CONTEXT__Sp 0x20 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); -#define OFFSETOF__CONTEXT__Fp 0xB8 +#define OFFSETOF__CONTEXT__Fp 0xB0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); -#define OFFSETOF__CONTEXT__S0 0xC0 +#define OFFSETOF__CONTEXT__S0 0xB8 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S0 == offsetof(T_CONTEXT, S0)); -#define OFFSETOF__CONTEXT__Pc 0xF8 +#define OFFSETOF__CONTEXT__Pc 0x100 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); #define CONTEXT_INTEGER_BIT 1 diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 50ba8149173101..d3a72ca51a28ef 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -97,16 +97,20 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); #define OFFSETOF__CONTEXT__ContextFlags 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, ContextFlags)); -#define OFFSETOF__CONTEXT__Ra 0x08 +#define OFFSETOF__CONTEXT__Ra 0x10 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); -#define OFFSETOF__CONTEXT__Sp 0x10 +#define OFFSETOF__CONTEXT__Sp 0x18 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); -#define OFFSETOF__CONTEXT__Gp 0x20 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Gp == offsetof(T_CONTEXT, Gp)); +#define OFFSETOF__CONTEXT__Fp 0x48 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); +#define OFFSETOF__CONTEXT__S1 0x50 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S1 == offsetof(T_CONTEXT, S1)); -#define OFFSETOF__CONTEXT__Tp 0x28 +#define OFFSETOF__CONTEXT__S2 0x98 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); +#define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Tp == offsetof(T_CONTEXT, Tp)); #define OFFSETOF__CONTEXT__Fp 0x48 From d15f9f4ba99e7cfb3176879fc592b9fc111e96f5 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:44:06 +0200 Subject: [PATCH 06/14] update offsets --- src/coreclr/vm/loongarch64/asmconstants.h | 6 +++--- src/coreclr/vm/riscv64/asmconstants.h | 12 +----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 8727493e0ec103..fc0bd870283d7b 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -108,13 +108,13 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); #define OFFSETOF__CONTEXT__Sp 0x20 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); -#define OFFSETOF__CONTEXT__Fp 0xB0 +#define OFFSETOF__CONTEXT__Fp 0xB8 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); -#define OFFSETOF__CONTEXT__S0 0xB8 +#define OFFSETOF__CONTEXT__S0 0xC0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S0 == offsetof(T_CONTEXT, S0)); -#define OFFSETOF__CONTEXT__Pc 0x100 +#define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); #define CONTEXT_INTEGER_BIT 1 diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index d3a72ca51a28ef..d3d95764ef04db 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -103,16 +103,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); #define OFFSETOF__CONTEXT__Sp 0x18 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); -#define OFFSETOF__CONTEXT__Fp 0x48 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); -#define OFFSETOF__CONTEXT__S1 0x50 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S1 == offsetof(T_CONTEXT, S1)); - -#define OFFSETOF__CONTEXT__S2 0x98 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); -#define OFFSETOF__CONTEXT__Pc 0x108 -ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Tp == offsetof(T_CONTEXT, Tp)); - #define OFFSETOF__CONTEXT__Fp 0x48 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); @@ -122,7 +112,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S1 == offsetof(T_CONTEXT, S1)); #define OFFSETOF__CONTEXT__S2 0x98 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); -#define OFFSETOF__CONTEXT__Pc 0xF0 +#define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); #define CONTEXT_INTEGER_BIT 1 From 4c4053bced78d0a87116f00e586dd5132c5b678d Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Sat, 17 Jan 2026 19:06:54 +0200 Subject: [PATCH 07/14] missing offsets defs --- src/coreclr/vm/riscv64/asmconstants.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index d3d95764ef04db..c29837e076ef10 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -103,6 +103,12 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); #define OFFSETOF__CONTEXT__Sp 0x18 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); +#define OFFSETOF__CONTEXT__Gp 0x20 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Gp == offsetof(T_CONTEXT, Gp)); + +#define OFFSETOF__CONTEXT__Tp 0x28 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Tp == offsetof(T_CONTEXT, Tp)); + #define OFFSETOF__CONTEXT__Fp 0x48 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); From e1b4e33011d371c49e9c98a1adf00104c69c29dc Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 18 Jan 2026 00:09:33 +0200 Subject: [PATCH 08/14] volatile arg regs --- src/coreclr/vm/arm64/asmconstants.h | 3 ++ src/coreclr/vm/arm64/asmhelpers.S | 35 +++++++++------ src/coreclr/vm/arm64/asmhelpers.asm | 32 +++++++++----- src/coreclr/vm/loongarch64/asmconstants.h | 3 ++ src/coreclr/vm/loongarch64/asmhelpers.S | 48 +++++++++++++-------- src/coreclr/vm/riscv64/asmconstants.h | 3 ++ src/coreclr/vm/riscv64/asmhelpers.S | 52 ++++++++++++++--------- 7 files changed, 116 insertions(+), 60 deletions(-) diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 2163b2c0924c46..0a39bf2658dfc4 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -107,6 +107,9 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__ContextFlags == offsetof(T_CONTEXT, Con // CONTEXT_INTEGER_BIT is bit 1 in ContextFlags (from pal.h CONTEXT_INTEGER definition) #define CONTEXT_INTEGER_BIT 1 +#define OFFSETOF__CONTEXT__X0 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__X0 == offsetof(T_CONTEXT, X0)); + #define OFFSETOF__CONTEXT__X19 0xA0 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__X19 == offsetof(T_CONTEXT, X19)); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 21e6fe4ebdf469..2eb8d59af0a35d 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -2811,7 +2811,8 @@ NESTED_END IL_Rethrow, _TEXT // ClrRestoreNonvolatileContextWorker // // Restores non-volatile (callee-saved) registers based on ContextFlags and jumps to the target IP. -// This is the ARM64 equivalent of the AMD64 version used for OSR transitions. +// Also restores volatile argument registers (x0-x7) which are used to pass arguments to the target function. +// This is the ARM64 equivalent of the AMD64 version used for OSR transitions and exception handling. // // Arguments: // x0 - pointer to CONTEXT structure @@ -2820,25 +2821,33 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + // Save CONTEXT pointer in x16 (scratch register) so we can restore x0 later + mov x16, x0 + // Check ContextFlags to see if we should restore integer registers - ldr w16, [x0, #OFFSETOF__CONTEXT__ContextFlags] - tbz w16, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) + ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] + tbz w17, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) // Restore callee-saved registers x19-x28 - ldp x19, x20, [x0, #OFFSETOF__CONTEXT__X19] - ldp x21, x22, [x0, #(OFFSETOF__CONTEXT__X19 + 16)] - ldp x23, x24, [x0, #(OFFSETOF__CONTEXT__X19 + 32)] - ldp x25, x26, [x0, #(OFFSETOF__CONTEXT__X19 + 48)] - ldp x27, x28, [x0, #(OFFSETOF__CONTEXT__X19 + 64)] + ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] + ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] + ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] + ldp x25, x26, [x16, #(OFFSETOF__CONTEXT__X19 + 48)] + ldp x27, x28, [x16, #(OFFSETOF__CONTEXT__X19 + 64)] + + // Also restore argument registers x0-x7 (used for passing arguments to target function) + ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] + ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] + ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] + ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] LOCAL_LABEL(SkipIntegerRestore): // Restore fp (x29) and lr (x30) - ldp fp, lr, [x0, #OFFSETOF__CONTEXT__Fp] + ldp fp, lr, [x16, #OFFSETOF__CONTEXT__Fp] - // Load Sp and Pc into temporaries - // We use x16 and x17 as they are IP0/IP1 (intra-procedure call scratch registers) - ldr x16, [x0, #OFFSETOF__CONTEXT__Sp] - ldr x17, [x0, #OFFSETOF__CONTEXT__Pc] + // Load Sp and Pc into temporaries (x16 will be overwritten, x17 holds Pc) + ldr x17, [x16, #OFFSETOF__CONTEXT__Pc] + ldr x16, [x16, #OFFSETOF__CONTEXT__Sp] // Set sp and jump to target mov sp, x16 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index d8e9f5fa9a292e..d99149197b522a 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -3064,29 +3064,39 @@ CopyLoop ; ClrRestoreNonvolatileContextWorker ; ; Restores non-volatile registers based on ContextFlags and jumps to PC. +; Also restores volatile argument registers (x0-x7) for passing arguments. ; x0 - pointer to CONTEXT structure ; x1 - unused (SSP, not used on ARM64) ; ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker + ; Save CONTEXT pointer in x16 so we can restore x0 later + mov x16, x0 + ; Check if CONTEXT_INTEGER bit is set - ldr w16, [x0, #OFFSETOF__CONTEXT__ContextFlags] - tbz w16, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 + ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] + tbz w17, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 ; Restore callee-saved registers x19-x28 - ldp x19, x20, [x0, #OFFSETOF__CONTEXT__X19] - ldp x21, x22, [x0, #(OFFSETOF__CONTEXT__X19 + 16)] - ldp x23, x24, [x0, #(OFFSETOF__CONTEXT__X19 + 32)] - ldp x25, x26, [x0, #(OFFSETOF__CONTEXT__X19 + 48)] - ldp x27, x28, [x0, #(OFFSETOF__CONTEXT__X19 + 64)] + ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] + ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] + ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] + ldp x25, x26, [x16, #(OFFSETOF__CONTEXT__X19 + 48)] + ldp x27, x28, [x16, #(OFFSETOF__CONTEXT__X19 + 64)] + + ; Also restore argument registers x0-x7 (for passing arguments to target) + ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] + ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] + ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] + ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] SkipIntegerRestore ; Restore fp and lr - ldp fp, lr, [x0, #OFFSETOF__CONTEXT__Fp] + ldp fp, lr, [x16, #OFFSETOF__CONTEXT__Fp] - ; Load Sp and Pc - ldr x16, [x0, #OFFSETOF__CONTEXT__Sp] - ldr x17, [x0, #OFFSETOF__CONTEXT__Pc] + ; Load Sp and Pc (x16 will be overwritten) + ldr x17, [x16, #OFFSETOF__CONTEXT__Pc] + ldr x16, [x16, #OFFSETOF__CONTEXT__Sp] ; Set sp and jump mov sp, x16 diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index fc0bd870283d7b..24787cbd35146c 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -108,6 +108,9 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Ra == offsetof(T_CONTEXT, Ra)); #define OFFSETOF__CONTEXT__Sp 0x20 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); +#define OFFSETOF__CONTEXT__A0 0x28 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__A0 == offsetof(T_CONTEXT, A0)); + #define OFFSETOF__CONTEXT__Fp 0xB8 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 999dbee8584211..cf1680ca3fcfec 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1070,6 +1070,7 @@ NESTED_END IL_Rethrow, _TEXT // ClrRestoreNonvolatileContextWorker // // Restores non-volatile registers based on ContextFlags and jumps to target PC. +// Also restores volatile argument registers ($a0-$a7) for passing arguments. // // Arguments: // $a0 - pointer to CONTEXT structure @@ -1077,30 +1078,43 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + // Save CONTEXT pointer in $t0 so we can restore $a0 later + move $t0, $a0 + // Check if CONTEXT_INTEGER is set - ld.w $t0, $a0, OFFSETOF__CONTEXT__ContextFlags - andi $t1, $t0, (1 << CONTEXT_INTEGER_BIT) - beqz $t1, LOCAL_LABEL(SkipIntegerRestore) + ld.w $t1, $t0, OFFSETOF__CONTEXT__ContextFlags + andi $t2, $t1, (1 << CONTEXT_INTEGER_BIT) + beqz $t2, LOCAL_LABEL(SkipIntegerRestore) // Restore callee-saved registers s0-s8 - ld.d $s0, $a0, OFFSETOF__CONTEXT__S0 - ld.d $s1, $a0, (OFFSETOF__CONTEXT__S0 + 8) - ld.d $s2, $a0, (OFFSETOF__CONTEXT__S0 + 16) - ld.d $s3, $a0, (OFFSETOF__CONTEXT__S0 + 24) - ld.d $s4, $a0, (OFFSETOF__CONTEXT__S0 + 32) - ld.d $s5, $a0, (OFFSETOF__CONTEXT__S0 + 40) - ld.d $s6, $a0, (OFFSETOF__CONTEXT__S0 + 48) - ld.d $s7, $a0, (OFFSETOF__CONTEXT__S0 + 56) - ld.d $s8, $a0, (OFFSETOF__CONTEXT__S0 + 64) + ld.d $s0, $t0, OFFSETOF__CONTEXT__S0 + ld.d $s1, $t0, (OFFSETOF__CONTEXT__S0 + 8) + ld.d $s2, $t0, (OFFSETOF__CONTEXT__S0 + 16) + ld.d $s3, $t0, (OFFSETOF__CONTEXT__S0 + 24) + ld.d $s4, $t0, (OFFSETOF__CONTEXT__S0 + 32) + ld.d $s5, $t0, (OFFSETOF__CONTEXT__S0 + 40) + ld.d $s6, $t0, (OFFSETOF__CONTEXT__S0 + 48) + ld.d $s7, $t0, (OFFSETOF__CONTEXT__S0 + 56) + ld.d $s8, $t0, (OFFSETOF__CONTEXT__S0 + 64) + + // Also restore argument registers $a0-$a7 (for passing arguments to target) + ld.d $a0, $t0, OFFSETOF__CONTEXT__A0 + ld.d $a1, $t0, (OFFSETOF__CONTEXT__A0 + 8) + ld.d $a2, $t0, (OFFSETOF__CONTEXT__A0 + 16) + ld.d $a3, $t0, (OFFSETOF__CONTEXT__A0 + 24) + ld.d $a4, $t0, (OFFSETOF__CONTEXT__A0 + 32) + ld.d $a5, $t0, (OFFSETOF__CONTEXT__A0 + 40) + ld.d $a6, $t0, (OFFSETOF__CONTEXT__A0 + 48) + ld.d $a7, $t0, (OFFSETOF__CONTEXT__A0 + 56) LOCAL_LABEL(SkipIntegerRestore): // Restore fp, ra - ld.d $fp, $a0, OFFSETOF__CONTEXT__Fp - ld.d $ra, $a0, OFFSETOF__CONTEXT__Ra + ld.d $fp, $t0, OFFSETOF__CONTEXT__Fp + ld.d $ra, $t0, OFFSETOF__CONTEXT__Ra - // Load sp and pc - ld.d $t0, $a0, OFFSETOF__CONTEXT__Sp - ld.d $t1, $a0, OFFSETOF__CONTEXT__Pc + // Load sp and pc ($t0 will be overwritten) + ld.d $t1, $t0, OFFSETOF__CONTEXT__Pc + ld.d $t0, $t0, OFFSETOF__CONTEXT__Sp // Set sp and jump move $sp, $t0 diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index c29837e076ef10..6bf0a6ae5f155f 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -115,6 +115,9 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Fp == offsetof(T_CONTEXT, Fp)); #define OFFSETOF__CONTEXT__S1 0x50 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S1 == offsetof(T_CONTEXT, S1)); +#define OFFSETOF__CONTEXT__A0 0x58 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__A0 == offsetof(T_CONTEXT, A0)); + #define OFFSETOF__CONTEXT__S2 0x98 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 1fad5fe1021287..a3c5cac4135e68 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -927,6 +927,7 @@ NESTED_END IL_Rethrow, _TEXT // ClrRestoreNonvolatileContextWorker // // Restores non-volatile registers based on ContextFlags and jumps to target PC. +// Also restores volatile argument registers (a0-a7) for passing arguments. // // Arguments: // a0 - pointer to CONTEXT structure @@ -934,32 +935,45 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT + // Save CONTEXT pointer in t0 so we can restore a0 later + mv t0, a0 + // Check if CONTEXT_INTEGER is set - lw t0, OFFSETOF__CONTEXT__ContextFlags(a0) - andi t1, t0, (1 << CONTEXT_INTEGER_BIT) - beqz t1, LOCAL_LABEL(SkipIntegerRestore) + lw t1, OFFSETOF__CONTEXT__ContextFlags(t0) + andi t2, t1, (1 << CONTEXT_INTEGER_BIT) + beqz t2, LOCAL_LABEL(SkipIntegerRestore) // Restore callee-saved registers s1-s11 - ld s1, OFFSETOF__CONTEXT__S1(a0) - ld s2, OFFSETOF__CONTEXT__S2(a0) - ld s3, (OFFSETOF__CONTEXT__S2 + 8)(a0) - ld s4, (OFFSETOF__CONTEXT__S2 + 16)(a0) - ld s5, (OFFSETOF__CONTEXT__S2 + 24)(a0) - ld s6, (OFFSETOF__CONTEXT__S2 + 32)(a0) - ld s7, (OFFSETOF__CONTEXT__S2 + 40)(a0) - ld s8, (OFFSETOF__CONTEXT__S2 + 48)(a0) - ld s9, (OFFSETOF__CONTEXT__S2 + 56)(a0) - ld s10, (OFFSETOF__CONTEXT__S2 + 64)(a0) - ld s11, (OFFSETOF__CONTEXT__S2 + 72)(a0) + ld s1, OFFSETOF__CONTEXT__S1(t0) + ld s2, OFFSETOF__CONTEXT__S2(t0) + ld s3, (OFFSETOF__CONTEXT__S2 + 8)(t0) + ld s4, (OFFSETOF__CONTEXT__S2 + 16)(t0) + ld s5, (OFFSETOF__CONTEXT__S2 + 24)(t0) + ld s6, (OFFSETOF__CONTEXT__S2 + 32)(t0) + ld s7, (OFFSETOF__CONTEXT__S2 + 40)(t0) + ld s8, (OFFSETOF__CONTEXT__S2 + 48)(t0) + ld s9, (OFFSETOF__CONTEXT__S2 + 56)(t0) + ld s10, (OFFSETOF__CONTEXT__S2 + 64)(t0) + ld s11, (OFFSETOF__CONTEXT__S2 + 72)(t0) + + // Also restore argument registers a0-a7 (for passing arguments to target) + ld a0, OFFSETOF__CONTEXT__A0(t0) + ld a1, (OFFSETOF__CONTEXT__A0 + 8)(t0) + ld a2, (OFFSETOF__CONTEXT__A0 + 16)(t0) + ld a3, (OFFSETOF__CONTEXT__A0 + 24)(t0) + ld a4, (OFFSETOF__CONTEXT__A0 + 32)(t0) + ld a5, (OFFSETOF__CONTEXT__A0 + 40)(t0) + ld a6, (OFFSETOF__CONTEXT__A0 + 48)(t0) + ld a7, (OFFSETOF__CONTEXT__A0 + 56)(t0) LOCAL_LABEL(SkipIntegerRestore): // Restore fp, ra - ld fp, OFFSETOF__CONTEXT__Fp(a0) - ld ra, OFFSETOF__CONTEXT__Ra(a0) + ld fp, OFFSETOF__CONTEXT__Fp(t0) + ld ra, OFFSETOF__CONTEXT__Ra(t0) - // Load sp and pc - ld t0, OFFSETOF__CONTEXT__Sp(a0) - ld t1, OFFSETOF__CONTEXT__Pc(a0) + // Load sp and pc (t0 will be overwritten) + ld t1, OFFSETOF__CONTEXT__Pc(t0) + ld t0, OFFSETOF__CONTEXT__Sp(t0) // Set sp and jump mv sp, t0 From 1ef509357963e8b94d8cad0c976e92701c5938a9 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 18 Jan 2026 07:05:28 +0200 Subject: [PATCH 09/14] arm64 tweaks --- src/coreclr/vm/arm64/asmhelpers.S | 39 ++++++++------- src/coreclr/vm/arm64/asmhelpers.asm | 27 +++++++---- src/coreclr/vm/excep.cpp | 64 +++++++++++++++++++++---- src/coreclr/vm/loongarch64/asmhelpers.S | 28 ++++++----- src/coreclr/vm/riscv64/asmhelpers.S | 28 ++++++----- 5 files changed, 125 insertions(+), 61 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 2eb8d59af0a35d..696a8513a70bdf 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -2810,9 +2810,9 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ // ClrRestoreNonvolatileContextWorker // -// Restores non-volatile (callee-saved) registers based on ContextFlags and jumps to the target IP. -// Also restores volatile argument registers (x0-x7) which are used to pass arguments to the target function. -// This is the ARM64 equivalent of the AMD64 version used for OSR transitions and exception handling. +// Restores registers based on ContextFlags and jumps to the target IP. +// When CONTEXT_INTEGER is set, restores ALL integer registers (x0-x28) +// because exception handling needs x0 (exception object). // // Arguments: // x0 - pointer to CONTEXT structure @@ -2821,36 +2821,41 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT - // Save CONTEXT pointer in x16 (scratch register) so we can restore x0 later + // Save context pointer in x16 since we'll overwrite x0 mov x16, x0 // Check ContextFlags to see if we should restore integer registers ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] tbz w17, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) - // Restore callee-saved registers x19-x28 + // Restore ALL integer registers x0-x28 + // Exception handling needs x0 (exception object) + ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] + ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] + ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] + ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] + ldp x8, x9, [x16, #(OFFSETOF__CONTEXT__X0 + 64)] + ldp x10, x11, [x16, #(OFFSETOF__CONTEXT__X0 + 80)] + ldp x12, x13, [x16, #(OFFSETOF__CONTEXT__X0 + 96)] + ldp x14, x15, [x16, #(OFFSETOF__CONTEXT__X0 + 112)] + // Skip x16, x17 - they're scratch registers we're using + // x18 is platform reserved ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] ldp x25, x26, [x16, #(OFFSETOF__CONTEXT__X19 + 48)] ldp x27, x28, [x16, #(OFFSETOF__CONTEXT__X19 + 64)] - // Also restore argument registers x0-x7 (used for passing arguments to target function) - ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] - ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] - ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] - ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] - LOCAL_LABEL(SkipIntegerRestore): // Restore fp (x29) and lr (x30) ldp fp, lr, [x16, #OFFSETOF__CONTEXT__Fp] - // Load Sp and Pc into temporaries (x16 will be overwritten, x17 holds Pc) - ldr x17, [x16, #OFFSETOF__CONTEXT__Pc] - ldr x16, [x16, #OFFSETOF__CONTEXT__Sp] + // Load Sp and Pc into scratch registers (after all other loads) + ldr x17, [x16, #OFFSETOF__CONTEXT__Sp] + ldr x16, [x16, #OFFSETOF__CONTEXT__Pc] // Set sp and jump to target - mov sp, x16 - br x17 + mov sp, x17 + br x16 -LEAF_END ClrRestoreNonvolatileContextWorker, _TEXT \ No newline at end of file +LEAF_END ClrRestoreNonvolatileContextWorker, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index d99149197b522a..b498d5d8d71e93 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -3063,33 +3063,40 @@ CopyLoop ; ------------------------------------------------------------------ ; ClrRestoreNonvolatileContextWorker ; -; Restores non-volatile registers based on ContextFlags and jumps to PC. -; Also restores volatile argument registers (x0-x7) for passing arguments. +; Restores registers based on ContextFlags and jumps to PC. +; When CONTEXT_INTEGER is set, restores ALL integer registers (x0-x28) +; because exception handling needs x0 (exception object). +; ; x0 - pointer to CONTEXT structure ; x1 - unused (SSP, not used on ARM64) ; ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker - ; Save CONTEXT pointer in x16 so we can restore x0 later + ; Save CONTEXT pointer in x16 before we potentially clobber x0 mov x16, x0 ; Check if CONTEXT_INTEGER bit is set ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] tbz w17, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 - ; Restore callee-saved registers x19-x28 + ; Restore ALL integer registers x0-x28 + ; Exception handling needs x0 (exception object) + ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] + ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] + ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] + ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] + ldp x8, x9, [x16, #(OFFSETOF__CONTEXT__X0 + 64)] + ldp x10, x11, [x16, #(OFFSETOF__CONTEXT__X0 + 80)] + ldp x12, x13, [x16, #(OFFSETOF__CONTEXT__X0 + 96)] + ldp x14, x15, [x16, #(OFFSETOF__CONTEXT__X0 + 112)] + ; Skip x16, x17 - they're scratch registers we're using + ; x18 is platform reserved ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] ldp x25, x26, [x16, #(OFFSETOF__CONTEXT__X19 + 48)] ldp x27, x28, [x16, #(OFFSETOF__CONTEXT__X19 + 64)] - ; Also restore argument registers x0-x7 (for passing arguments to target) - ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] - ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] - ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] - ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] - SkipIntegerRestore ; Restore fp and lr ldp fp, lr, [x16, #OFFSETOF__CONTEXT__Fp] diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index c6f9a0a8079cbd..ff4c2972b4396c 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -11278,23 +11278,52 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra *pCurrentFP = pTransitionBlock->m_calleeSavedRegisters.Rbp; #elif defined(TARGET_ARM64) - // Only restore control registers - callee-saved regs already have correct CPU values - pContext->ContextFlags = CONTEXT_CONTROL; + // Restore control and integer registers, matching the x64 approach + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + // Copy callee-saved registers x19-x28 from TransitionBlock + // These are the values F() had when it called JIT_Patchpoint + pContext->X19 = pTransitionBlock->m_calleeSavedRegisters.x19; + pContext->X20 = pTransitionBlock->m_calleeSavedRegisters.x20; + pContext->X21 = pTransitionBlock->m_calleeSavedRegisters.x21; + pContext->X22 = pTransitionBlock->m_calleeSavedRegisters.x22; + pContext->X23 = pTransitionBlock->m_calleeSavedRegisters.x23; + pContext->X24 = pTransitionBlock->m_calleeSavedRegisters.x24; + pContext->X25 = pTransitionBlock->m_calleeSavedRegisters.x25; + pContext->X26 = pTransitionBlock->m_calleeSavedRegisters.x26; + pContext->X27 = pTransitionBlock->m_calleeSavedRegisters.x27; + pContext->X28 = pTransitionBlock->m_calleeSavedRegisters.x28; + + // F()'s FP points to where F() saved [caller_fp, caller_lr] UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.x29; - TADDR callerFP = *((TADDR*)managedFrameFP); - TADDR callerLR = *((TADDR*)(managedFrameFP + 8)); - - // Use caller's FP so F-OSR returns with correct FP for caller + // Read Test()'s FP and LR from F()'s stack frame + TADDR callerFP = *((TADDR*)managedFrameFP); // Test()'s FP at [F's FP + 0] + TADDR callerLR = *((TADDR*)(managedFrameFP + 8)); // LR to Test() at [F's FP + 8] + + // CRITICAL: Use Test()'s FP (callerFP), not F()'s FP (managedFrameFP)! + // This matches what VirtualUnwind produces - the CALLER's frame pointer pContext->Fp = callerFP; pContext->Lr = callerLR; + // SP = F()'s SP when it called JIT_Patchpoint *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); + // FP output should also be caller's FP to match VirtualUnwind behavior *pCurrentFP = callerFP; #elif defined(TARGET_LOONGARCH64) - // Only restore control registers - callee-saved regs already have correct CPU values - pContext->ContextFlags = CONTEXT_CONTROL; + // Restore control and integer registers, matching the ARM64 approach + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + + // Copy callee-saved registers s0-s8 from TransitionBlock + pContext->S0 = pTransitionBlock->m_calleeSavedRegisters.s0; + pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; + pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; + pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; + pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; + pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; + pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; + pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; + pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.fp; TADDR callerFP = *((TADDR*)managedFrameFP); @@ -11307,8 +11336,23 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra *pCurrentFP = callerFP; #elif defined(TARGET_RISCV64) - // Only restore control registers - callee-saved regs already have correct CPU values - pContext->ContextFlags = CONTEXT_CONTROL; + // Restore control and integer registers, matching the ARM64 approach + pContext->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + + // Copy callee-saved registers s1-s11 from TransitionBlock + pContext->S1 = pTransitionBlock->m_calleeSavedRegisters.s1; + pContext->S2 = pTransitionBlock->m_calleeSavedRegisters.s2; + pContext->S3 = pTransitionBlock->m_calleeSavedRegisters.s3; + pContext->S4 = pTransitionBlock->m_calleeSavedRegisters.s4; + pContext->S5 = pTransitionBlock->m_calleeSavedRegisters.s5; + pContext->S6 = pTransitionBlock->m_calleeSavedRegisters.s6; + pContext->S7 = pTransitionBlock->m_calleeSavedRegisters.s7; + pContext->S8 = pTransitionBlock->m_calleeSavedRegisters.s8; + pContext->S9 = pTransitionBlock->m_calleeSavedRegisters.s9; + pContext->S10 = pTransitionBlock->m_calleeSavedRegisters.s10; + pContext->S11 = pTransitionBlock->m_calleeSavedRegisters.s11; + pContext->Tp = pTransitionBlock->m_calleeSavedRegisters.tp; + pContext->Gp = pTransitionBlock->m_calleeSavedRegisters.gp; UINT_PTR managedFrameFP = pTransitionBlock->m_calleeSavedRegisters.fp; TADDR callerFP = *((TADDR*)managedFrameFP); diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index cf1680ca3fcfec..4dc6fd11d4e618 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1069,8 +1069,9 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ // ClrRestoreNonvolatileContextWorker // -// Restores non-volatile registers based on ContextFlags and jumps to target PC. -// Also restores volatile argument registers ($a0-$a7) for passing arguments. +// Restores registers based on ContextFlags and jumps to target PC. +// When CONTEXT_INTEGER is set, restores ALL integer registers +// because exception handling needs $a0 (exception object). // // Arguments: // $a0 - pointer to CONTEXT structure @@ -1078,7 +1079,7 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT - // Save CONTEXT pointer in $t0 so we can restore $a0 later + // Save CONTEXT pointer in $t0 before we potentially clobber $a0 move $t0, $a0 // Check if CONTEXT_INTEGER is set @@ -1086,7 +1087,17 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT andi $t2, $t1, (1 << CONTEXT_INTEGER_BIT) beqz $t2, LOCAL_LABEL(SkipIntegerRestore) - // Restore callee-saved registers s0-s8 + // Restore ALL integer registers (needed by exception handling) + // Argument registers a1-a7 (a0 last since we use it) + ld.d $a1, $t0, (OFFSETOF__CONTEXT__A0 + 8) + ld.d $a2, $t0, (OFFSETOF__CONTEXT__A0 + 16) + ld.d $a3, $t0, (OFFSETOF__CONTEXT__A0 + 24) + ld.d $a4, $t0, (OFFSETOF__CONTEXT__A0 + 32) + ld.d $a5, $t0, (OFFSETOF__CONTEXT__A0 + 40) + ld.d $a6, $t0, (OFFSETOF__CONTEXT__A0 + 48) + ld.d $a7, $t0, (OFFSETOF__CONTEXT__A0 + 56) + + // Callee-saved registers s0-s8 ld.d $s0, $t0, OFFSETOF__CONTEXT__S0 ld.d $s1, $t0, (OFFSETOF__CONTEXT__S0 + 8) ld.d $s2, $t0, (OFFSETOF__CONTEXT__S0 + 16) @@ -1097,15 +1108,8 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT ld.d $s7, $t0, (OFFSETOF__CONTEXT__S0 + 56) ld.d $s8, $t0, (OFFSETOF__CONTEXT__S0 + 64) - // Also restore argument registers $a0-$a7 (for passing arguments to target) + // Restore $a0 last (exception object for exception handling) ld.d $a0, $t0, OFFSETOF__CONTEXT__A0 - ld.d $a1, $t0, (OFFSETOF__CONTEXT__A0 + 8) - ld.d $a2, $t0, (OFFSETOF__CONTEXT__A0 + 16) - ld.d $a3, $t0, (OFFSETOF__CONTEXT__A0 + 24) - ld.d $a4, $t0, (OFFSETOF__CONTEXT__A0 + 32) - ld.d $a5, $t0, (OFFSETOF__CONTEXT__A0 + 40) - ld.d $a6, $t0, (OFFSETOF__CONTEXT__A0 + 48) - ld.d $a7, $t0, (OFFSETOF__CONTEXT__A0 + 56) LOCAL_LABEL(SkipIntegerRestore): // Restore fp, ra diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index a3c5cac4135e68..9d00c6f4ab39f3 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -926,8 +926,9 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ // ClrRestoreNonvolatileContextWorker // -// Restores non-volatile registers based on ContextFlags and jumps to target PC. -// Also restores volatile argument registers (a0-a7) for passing arguments. +// Restores registers based on ContextFlags and jumps to target PC. +// When CONTEXT_INTEGER is set, restores ALL integer registers +// because exception handling needs a0 (exception object). // // Arguments: // a0 - pointer to CONTEXT structure @@ -935,7 +936,7 @@ NESTED_END IL_Rethrow, _TEXT // ------------------------------------------------------------------ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT - // Save CONTEXT pointer in t0 so we can restore a0 later + // Save CONTEXT pointer in t0 before we potentially clobber a0 mv t0, a0 // Check if CONTEXT_INTEGER is set @@ -943,7 +944,17 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT andi t2, t1, (1 << CONTEXT_INTEGER_BIT) beqz t2, LOCAL_LABEL(SkipIntegerRestore) - // Restore callee-saved registers s1-s11 + // Restore ALL integer registers (needed by exception handling) + // Argument registers a1-a7 (a0 last since we use it) + ld a1, (OFFSETOF__CONTEXT__A0 + 8)(t0) + ld a2, (OFFSETOF__CONTEXT__A0 + 16)(t0) + ld a3, (OFFSETOF__CONTEXT__A0 + 24)(t0) + ld a4, (OFFSETOF__CONTEXT__A0 + 32)(t0) + ld a5, (OFFSETOF__CONTEXT__A0 + 40)(t0) + ld a6, (OFFSETOF__CONTEXT__A0 + 48)(t0) + ld a7, (OFFSETOF__CONTEXT__A0 + 56)(t0) + + // Callee-saved registers s1-s11 ld s1, OFFSETOF__CONTEXT__S1(t0) ld s2, OFFSETOF__CONTEXT__S2(t0) ld s3, (OFFSETOF__CONTEXT__S2 + 8)(t0) @@ -956,15 +967,8 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT ld s10, (OFFSETOF__CONTEXT__S2 + 64)(t0) ld s11, (OFFSETOF__CONTEXT__S2 + 72)(t0) - // Also restore argument registers a0-a7 (for passing arguments to target) + // Restore a0 last (exception object for exception handling) ld a0, OFFSETOF__CONTEXT__A0(t0) - ld a1, (OFFSETOF__CONTEXT__A0 + 8)(t0) - ld a2, (OFFSETOF__CONTEXT__A0 + 16)(t0) - ld a3, (OFFSETOF__CONTEXT__A0 + 24)(t0) - ld a4, (OFFSETOF__CONTEXT__A0 + 32)(t0) - ld a5, (OFFSETOF__CONTEXT__A0 + 40)(t0) - ld a6, (OFFSETOF__CONTEXT__A0 + 48)(t0) - ld a7, (OFFSETOF__CONTEXT__A0 + 56)(t0) LOCAL_LABEL(SkipIntegerRestore): // Restore fp, ra From a787e2fc55ccdea582fd7a3463bf7f4796fd5513 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 18 Jan 2026 14:17:15 +0200 Subject: [PATCH 10/14] more tweaks --- src/coreclr/vm/arm64/asmconstants.h | 8 ++++++++ src/coreclr/vm/arm64/asmhelpers.S | 14 +++++++++++++- src/coreclr/vm/arm64/asmhelpers.asm | 14 +++++++++++++- src/coreclr/vm/loongarch64/asmconstants.h | 9 +++++++++ src/coreclr/vm/loongarch64/asmhelpers.S | 19 ++++++++++++++++++- src/coreclr/vm/riscv64/asmconstants.h | 9 +++++++++ src/coreclr/vm/riscv64/asmhelpers.S | 23 ++++++++++++++++++++++- 7 files changed, 92 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 0a39bf2658dfc4..1575d54928342d 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -125,6 +125,14 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Sp == offsetof(T_CONTEXT, Sp)); #define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); +// Floating point registers V[0..31] start at 0x110, each is 16 bytes (NEON128) +// Non-volatile FP registers are V8-V15 (d8-d15) +#define OFFSETOF__CONTEXT__V0 0x110 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__V0 == offsetof(T_CONTEXT, V)); + +// CONTEXT_FLOATING_POINT_BIT is bit 2 in ContextFlags +#define CONTEXT_FLOATING_POINT_BIT 2 + #define OFFSETOF__DynamicHelperStubArgs__Constant1 0x0 ASMCONSTANTS_C_ASSERT(OFFSETOF__DynamicHelperStubArgs__Constant1 == offsetof(DynamicHelperStubArgs, Constant1)); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 696a8513a70bdf..c0334f7ef9c128 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -2813,6 +2813,7 @@ NESTED_END IL_Rethrow, _TEXT // Restores registers based on ContextFlags and jumps to the target IP. // When CONTEXT_INTEGER is set, restores ALL integer registers (x0-x28) // because exception handling needs x0 (exception object). +// When CONTEXT_FLOATING_POINT is set, restores non-volatile FP regs (d8-d15). // // Arguments: // x0 - pointer to CONTEXT structure @@ -2824,8 +2825,19 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT // Save context pointer in x16 since we'll overwrite x0 mov x16, x0 - // Check ContextFlags to see if we should restore integer registers + // Check ContextFlags to see if we should restore floating point registers ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] + tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(SkipFloatingPointRestore) + + // Restore non-volatile FP registers d8-d15 (lower 64 bits of v8-v15) + // V8 is at OFFSETOF__CONTEXT__V0 + 8*16 = 0x110 + 0x80 = 0x190 + ldp q8, q9, [x16, #(OFFSETOF__CONTEXT__V0 + 8*16)] + ldp q10, q11, [x16, #(OFFSETOF__CONTEXT__V0 + 10*16)] + ldp q12, q13, [x16, #(OFFSETOF__CONTEXT__V0 + 12*16)] + ldp q14, q15, [x16, #(OFFSETOF__CONTEXT__V0 + 14*16)] + +LOCAL_LABEL(SkipFloatingPointRestore): + // Check ContextFlags to see if we should restore integer registers tbz w17, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) // Restore ALL integer registers x0-x28 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index b498d5d8d71e93..724c67d26b1a57 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -3066,6 +3066,7 @@ CopyLoop ; Restores registers based on ContextFlags and jumps to PC. ; When CONTEXT_INTEGER is set, restores ALL integer registers (x0-x28) ; because exception handling needs x0 (exception object). +; When CONTEXT_FLOATING_POINT is set, restores non-volatile FP regs (d8-d15). ; ; x0 - pointer to CONTEXT structure ; x1 - unused (SSP, not used on ARM64) @@ -3075,8 +3076,19 @@ CopyLoop ; Save CONTEXT pointer in x16 before we potentially clobber x0 mov x16, x0 - ; Check if CONTEXT_INTEGER bit is set + ; Check if CONTEXT_FLOATING_POINT bit is set (bit 2) ldr w17, [x16, #OFFSETOF__CONTEXT__ContextFlags] + tbz w17, #2, SkipFloatingPointRestore + + ; Restore non-volatile FP registers d8-d15 (full q8-q15) + ; V8 is at OFFSETOF__CONTEXT__V0 + 8*16 = 0x110 + 0x80 = 0x190 + ldp q8, q9, [x16, #(OFFSETOF__CONTEXT__V0 + 128)] + ldp q10, q11, [x16, #(OFFSETOF__CONTEXT__V0 + 160)] + ldp q12, q13, [x16, #(OFFSETOF__CONTEXT__V0 + 192)] + ldp q14, q15, [x16, #(OFFSETOF__CONTEXT__V0 + 224)] + +SkipFloatingPointRestore + ; Check if CONTEXT_INTEGER bit is set tbz w17, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 ; Restore ALL integer registers x0-x28 diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 24787cbd35146c..67e45a5cceb247 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -120,6 +120,15 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S0 == offsetof(T_CONTEXT, S0)); #define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); +// Floating point registers F[0..127] start after Pc +// Non-volatile FP registers are F24-F31 +// Each F entry is 8 bytes (ULONGLONG), but stored as 4*32 for LASX support +#define OFFSETOF__CONTEXT__F 0x110 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__F == offsetof(T_CONTEXT, F)); + +// CONTEXT_FLOATING_POINT_BIT is bit 2 in ContextFlags +#define CONTEXT_FLOATING_POINT_BIT 2 + #define CONTEXT_INTEGER_BIT 1 //========================================= diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 4dc6fd11d4e618..74893d04fdcf22 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1072,6 +1072,7 @@ NESTED_END IL_Rethrow, _TEXT // Restores registers based on ContextFlags and jumps to target PC. // When CONTEXT_INTEGER is set, restores ALL integer registers // because exception handling needs $a0 (exception object). +// When CONTEXT_FLOATING_POINT is set, restores non-volatile FP regs (f24-f31). // // Arguments: // $a0 - pointer to CONTEXT structure @@ -1082,8 +1083,24 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT // Save CONTEXT pointer in $t0 before we potentially clobber $a0 move $t0, $a0 - // Check if CONTEXT_INTEGER is set + // Check if CONTEXT_FLOATING_POINT is set ld.w $t1, $t0, OFFSETOF__CONTEXT__ContextFlags + andi $t2, $t1, (1 << CONTEXT_FLOATING_POINT_BIT) + beqz $t2, LOCAL_LABEL(SkipFloatingPointRestore) + + // Restore non-volatile FP registers f24-f31 + // F24 is at OFFSETOF__CONTEXT__F + 24*8 = 0x110 + 0xC0 = 0x1D0 + fld.d $f24, $t0, (OFFSETOF__CONTEXT__F + 24*8) + fld.d $f25, $t0, (OFFSETOF__CONTEXT__F + 25*8) + fld.d $f26, $t0, (OFFSETOF__CONTEXT__F + 26*8) + fld.d $f27, $t0, (OFFSETOF__CONTEXT__F + 27*8) + fld.d $f28, $t0, (OFFSETOF__CONTEXT__F + 28*8) + fld.d $f29, $t0, (OFFSETOF__CONTEXT__F + 29*8) + fld.d $f30, $t0, (OFFSETOF__CONTEXT__F + 30*8) + fld.d $f31, $t0, (OFFSETOF__CONTEXT__F + 31*8) + +LOCAL_LABEL(SkipFloatingPointRestore): + // Check if CONTEXT_INTEGER is set andi $t2, $t1, (1 << CONTEXT_INTEGER_BIT) beqz $t2, LOCAL_LABEL(SkipIntegerRestore) diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 6bf0a6ae5f155f..0ad2c2934fb7da 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -124,6 +124,15 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__S2 == offsetof(T_CONTEXT, S2)); #define OFFSETOF__CONTEXT__Pc 0x108 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Pc == offsetof(T_CONTEXT, Pc)); +// Floating point registers F[0..31] start after Pc +// Non-volatile FP registers are F8-F9, F18-F27 +// Each F entry is 8 bytes (ULONGLONG) +#define OFFSETOF__CONTEXT__F 0x110 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__F == offsetof(T_CONTEXT, F)); + +// CONTEXT_FLOATING_POINT_BIT is bit 2 in ContextFlags +#define CONTEXT_FLOATING_POINT_BIT 2 + #define CONTEXT_INTEGER_BIT 1 //========================================= diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 9d00c6f4ab39f3..fae34645025947 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -929,6 +929,7 @@ NESTED_END IL_Rethrow, _TEXT // Restores registers based on ContextFlags and jumps to target PC. // When CONTEXT_INTEGER is set, restores ALL integer registers // because exception handling needs a0 (exception object). +// When CONTEXT_FLOATING_POINT is set, restores non-volatile FP regs (f8-f9, f18-f27). // // Arguments: // a0 - pointer to CONTEXT structure @@ -939,8 +940,28 @@ LEAF_ENTRY ClrRestoreNonvolatileContextWorker, _TEXT // Save CONTEXT pointer in t0 before we potentially clobber a0 mv t0, a0 - // Check if CONTEXT_INTEGER is set + // Check if CONTEXT_FLOATING_POINT is set lw t1, OFFSETOF__CONTEXT__ContextFlags(t0) + andi t2, t1, (1 << CONTEXT_FLOATING_POINT_BIT) + beqz t2, LOCAL_LABEL(SkipFloatingPointRestore) + + // Restore non-volatile FP registers f8-f9, f18-f27 + // Each F entry is 8 bytes + fld f8, (OFFSETOF__CONTEXT__F + 8*8)(t0) + fld f9, (OFFSETOF__CONTEXT__F + 9*8)(t0) + fld f18, (OFFSETOF__CONTEXT__F + 18*8)(t0) + fld f19, (OFFSETOF__CONTEXT__F + 19*8)(t0) + fld f20, (OFFSETOF__CONTEXT__F + 20*8)(t0) + fld f21, (OFFSETOF__CONTEXT__F + 21*8)(t0) + fld f22, (OFFSETOF__CONTEXT__F + 22*8)(t0) + fld f23, (OFFSETOF__CONTEXT__F + 23*8)(t0) + fld f24, (OFFSETOF__CONTEXT__F + 24*8)(t0) + fld f25, (OFFSETOF__CONTEXT__F + 25*8)(t0) + fld f26, (OFFSETOF__CONTEXT__F + 26*8)(t0) + fld f27, (OFFSETOF__CONTEXT__F + 27*8)(t0) + +LOCAL_LABEL(SkipFloatingPointRestore): + // Check if CONTEXT_INTEGER is set andi t2, t1, (1 << CONTEXT_INTEGER_BIT) beqz t2, LOCAL_LABEL(SkipIntegerRestore) From 8e01a3c4337062081f09674ad5d38876aab401ee Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Sun, 18 Jan 2026 16:29:46 +0200 Subject: [PATCH 11/14] Apply suggestions from code review --- src/coreclr/vm/excep.cpp | 3 +-- src/coreclr/vm/jithelpers.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index ff4c2972b4396c..0989728a1de8d8 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -11301,13 +11301,12 @@ void SoftwareExceptionFrame::UpdateContextForOSRTransition(TransitionBlock* pTra TADDR callerLR = *((TADDR*)(managedFrameFP + 8)); // LR to Test() at [F's FP + 8] // CRITICAL: Use Test()'s FP (callerFP), not F()'s FP (managedFrameFP)! - // This matches what VirtualUnwind produces - the CALLER's frame pointer pContext->Fp = callerFP; pContext->Lr = callerLR; // SP = F()'s SP when it called JIT_Patchpoint *pCurrentSP = (UINT_PTR)(pTransitionBlock + 1); - // FP output should also be caller's FP to match VirtualUnwind behavior + // FP output should also be caller's FP *pCurrentFP = callerFP; #elif defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 7274d1186b9181..f785f90f2f4b34 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1628,8 +1628,7 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti UINT_PTR currentSP; UINT_PTR currentFP; - // Build CONTEXT directly from TransitionBlock - this completely bypasses - // RtlCaptureContext, VirtualUnwindToFirstManagedCallFrame, and RtlVirtualUnwind. + // Build CONTEXT directly from TransitionBlock CONTEXT frameContext; memset(&frameContext, 0, sizeof(frameContext)); pFrameContext = &frameContext; From 3188a5b8511624110523157cd13a97d6f8e21926 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Sun, 18 Jan 2026 16:30:48 +0200 Subject: [PATCH 12/14] Update src/coreclr/vm/jithelpers.cpp --- src/coreclr/vm/jithelpers.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index f785f90f2f4b34..6861a44f10f366 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1700,6 +1700,7 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti // Transition! ClrRestoreNonvolatileContext(pFrameContext); } + DONE: ::SetLastError(dwLastError); } From 18ae2d4f6e2803a4c2adc5257b2a7a4939e802ea Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Mon, 19 Jan 2026 18:22:37 +0200 Subject: [PATCH 13/14] Address CR fb --- src/coreclr/vm/excep.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 0989728a1de8d8..a324e4d83041e8 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -5709,9 +5709,7 @@ AdjustContextForJITHelpers( CONTEXT tempContext; CONTEXT* pExceptionContext = pContext; - bool fInWriteBarrier = IsIPInWriteBarrierHelper(f_IP); - bool fInJITStackProbe = IsIPInJITStackProbe(f_IP); - BOOL fExcluded = fInWriteBarrier || fInJITStackProbe; + BOOL fExcluded = IsIPInMarkedJitHelper(f_IP); if (fExcluded) { @@ -5723,7 +5721,7 @@ AdjustContextForJITHelpers( pContext = &tempContext; } - if (fInWriteBarrier) + if (IsIPInWriteBarrierHelper(f_IP)) { // Write barriers are leaf functions that do not set up a frame. // We can unwind them with a simple LR/RA/stack-pop extraction. @@ -5737,7 +5735,7 @@ AdjustContextForJITHelpers( SetIP(pContext, ControlPCPostAdjustment); #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 } - else if (fInJITStackProbe) + else { // JIT_StackProbe has a known frame layout on each platform. UnwindJITStackProbeToCaller(pContext); From 0474984e701160da3a6a4fc72ab86fef261985f3 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 22 Jan 2026 15:45:57 +0200 Subject: [PATCH 14/14] Address CR feedback --- src/coreclr/pal/inc/unixasmmacrosarm64.inc | 12 ++++-------- src/coreclr/vm/arm64/asmhelpers.S | 11 +++-------- src/coreclr/vm/arm64/asmhelpers.asm | 11 +++-------- src/coreclr/vm/arm64/asmmacros.h | 12 ++++-------- 4 files changed, 14 insertions(+), 32 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index c0203dbe3c66e0..258d3e8c4c28c5 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -472,14 +472,10 @@ C_FUNC(\Name\()_End): .macro POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN // Restore FP callee-saved registers (d8-d15) from sp+0 - ldr d8, [sp, #0] - ldr d9, [sp, #8] - ldr d10, [sp, #16] - ldr d11, [sp, #24] - ldr d12, [sp, #32] - ldr d13, [sp, #40] - ldr d14, [sp, #48] - ldr d15, [sp, #56] + ldp d8, d9, [sp, #0] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] // Deallocate space for FloatArgumentRegisters + FP callee-saved EPILOG_STACK_FREE 192 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index c0334f7ef9c128..837a793865d4ad 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -2840,18 +2840,13 @@ LOCAL_LABEL(SkipFloatingPointRestore): // Check ContextFlags to see if we should restore integer registers tbz w17, #CONTEXT_INTEGER_BIT, LOCAL_LABEL(SkipIntegerRestore) - // Restore ALL integer registers x0-x28 - // Exception handling needs x0 (exception object) + // Restore argument registers x0-x7 (exception handling needs x0 for exception object) + // and non-volatile registers x19-x28 ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] - ldp x8, x9, [x16, #(OFFSETOF__CONTEXT__X0 + 64)] - ldp x10, x11, [x16, #(OFFSETOF__CONTEXT__X0 + 80)] - ldp x12, x13, [x16, #(OFFSETOF__CONTEXT__X0 + 96)] - ldp x14, x15, [x16, #(OFFSETOF__CONTEXT__X0 + 112)] - // Skip x16, x17 - they're scratch registers we're using - // x18 is platform reserved + // Skip x8-x18: x8-x15 are scratch, x16-x17 we're using, x18 is platform reserved ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 724c67d26b1a57..b329dba05fdf60 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -3091,18 +3091,13 @@ SkipFloatingPointRestore ; Check if CONTEXT_INTEGER bit is set tbz w17, #1, SkipIntegerRestore ; CONTEXT_INTEGER_BIT = 1 - ; Restore ALL integer registers x0-x28 - ; Exception handling needs x0 (exception object) + ; Restore argument registers x0-x7 (exception handling needs x0 for exception object) + ; and non-volatile registers x19-x28 ldp x0, x1, [x16, #OFFSETOF__CONTEXT__X0] ldp x2, x3, [x16, #(OFFSETOF__CONTEXT__X0 + 16)] ldp x4, x5, [x16, #(OFFSETOF__CONTEXT__X0 + 32)] ldp x6, x7, [x16, #(OFFSETOF__CONTEXT__X0 + 48)] - ldp x8, x9, [x16, #(OFFSETOF__CONTEXT__X0 + 64)] - ldp x10, x11, [x16, #(OFFSETOF__CONTEXT__X0 + 80)] - ldp x12, x13, [x16, #(OFFSETOF__CONTEXT__X0 + 96)] - ldp x14, x15, [x16, #(OFFSETOF__CONTEXT__X0 + 112)] - ; Skip x16, x17 - they're scratch registers we're using - ; x18 is platform reserved + ; Skip x8-x18: x8-x15 are scratch, x16-x17 we're using, x18 is platform reserved ldp x19, x20, [x16, #OFFSETOF__CONTEXT__X19] ldp x21, x22, [x16, #(OFFSETOF__CONTEXT__X19 + 16)] ldp x23, x24, [x16, #(OFFSETOF__CONTEXT__X19 + 32)] diff --git a/src/coreclr/vm/arm64/asmmacros.h b/src/coreclr/vm/arm64/asmmacros.h index 7ac61354704073..fc144fb9842f94 100644 --- a/src/coreclr/vm/arm64/asmmacros.h +++ b/src/coreclr/vm/arm64/asmmacros.h @@ -258,14 +258,10 @@ OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context POP_COOP_PINVOKE_FRAME_WITH_FLOATS_RETURN ; Restore FP callee-saved registers (d8-d15) from sp+0 - ldr d8, [sp, #0] - ldr d9, [sp, #8] - ldr d10, [sp, #16] - ldr d11, [sp, #24] - ldr d12, [sp, #32] - ldr d13, [sp, #40] - ldr d14, [sp, #48] - ldr d15, [sp, #56] + ldp d8, d9, [sp, #0] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] ; Deallocate space for FloatArgumentRegisters + FP callee-saved EPILOG_STACK_FREE 192