From b47524d10ee9b5e39515c0210b7794650672446b Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Wed, 11 Mar 2026 14:27:50 +0000 Subject: [PATCH 1/4] Improve runtime-async support on linux-riscv64 --- src/coreclr/jit/codegenriscv64.cpp | 42 ++++-------------- src/coreclr/jit/emitriscv64.cpp | 22 ++++++++-- src/coreclr/jit/lclvars.cpp | 2 +- .../nativeaot/Runtime/riscv64/GcProbe.S | 43 ++++++++++--------- .../Runtime/unix/unixasmmacrosriscv64.inc | 1 + .../Compiler/DependencyAnalysis/Relocation.cs | 3 +- .../superpmi/superpmi-shared/spmiutil.cpp | 3 +- src/coreclr/utilcode/util.cpp | 3 +- src/coreclr/vm/riscv64/asmhelpers.S | 12 ++++-- src/coreclr/vm/riscv64/cgencpu.h | 5 +++ src/coreclr/vm/riscv64/stubs.cpp | 2 + 11 files changed, 72 insertions(+), 66 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 148a42c53c98b1..3e4b91797e4dad 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -544,14 +544,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { delta_PSP -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta_PSP -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta_PSP -= TARGET_POINTER_SIZE; - } funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); @@ -2258,7 +2250,7 @@ void CodeGen::genJumpTable(GenTree* treeNode) // Access to inline data is 'abstracted' by a special type of static member // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference // to constant data, not a real static field. - GetEmitter()->emitIns_R_C(INS_addi, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA, + GetEmitter()->emitIns_R_C(INS_addi, EA_PTRSIZE, treeNode->GetRegNum(), REG_NA, m_compiler->eeFindJitDataOffs(jmpTabBase)); genProduceReg(treeNode); } @@ -2271,7 +2263,13 @@ void CodeGen::genJumpTable(GenTree* treeNode) // void CodeGen::genAsyncResumeInfo(GenTreeVal* treeNode) { - GetEmitter()->emitIns_R_C(INS_addi, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA, + emitAttr attr = EA_PTRSIZE; + if (m_compiler->eeDataWithCodePointersNeedsRelocs()) + { + attr = EA_SET_FLG(EA_PTRSIZE, EA_CNS_RELOC_FLG); + } + + GetEmitter()->emitIns_R_C(INS_addi, attr, treeNode->GetRegNum(), REG_NA, genEmitAsyncResumeInfo((unsigned)treeNode->gtVal1)); genProduceReg(treeNode); } @@ -3365,14 +3363,6 @@ int CodeGenInterface::genSPtoFPdelta() const { delta -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta -= TARGET_POINTER_SIZE; - } assert(delta >= 0); return delta; @@ -6431,14 +6421,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { localFrameSize -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } #ifdef DEBUG if (m_compiler->opts.disAsm) @@ -6509,14 +6491,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) { localFrameSize -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " "localloc? %s\n", diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 4459866f2445b2..057518229faba1 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -1232,13 +1232,16 @@ void emitter::emitIns_R_R_R_R( /***************************************************************************** * * Add an instruction with a register + static member operands. - * Constant is stored into JIT data which is adjacent to code. + * Usually constants are stored into JIT data adjacent to code, in which case no + * relocation is needed. PC-relative offset will be encoded directly into instruction. * */ void emitter::emitIns_R_C( instruction ins, emitAttr attr, regNumber destReg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd) { instrDesc* id = emitNewInstr(attr); + id->idSetRelocFlags(attr); + id->idIns(ins); assert(destReg != REG_R0); // for special. reg Must not be R0. id->idReg1(destReg); @@ -3271,17 +3274,28 @@ BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instructio assert(offset >= 0); assert((UNATIVE_OFFSET)offset < emitDataSize()); + BYTE* const dstBase = dst; *ins = id->idIns(); const regNumber reg1 = id->idReg1(); assert(reg1 != REG_ZERO); assert(id->idCodeSize() == 2 * sizeof(code_t)); - const ssize_t immediate = emitDataOffsetToPtr(offset) - dst; - assert((immediate > 0) && ((immediate & 0x01) == 0)); - assert(isValidSimm32(immediate)); + + const ssize_t immediate = id->idIsReloc() ? 0 : (emitDataOffsetToPtr(offset) - dst); + if (!id->idIsReloc()) + { + assert((immediate > 0) && ((immediate & 0x01) == 0)); + assert(isValidSimm32(immediate)); + } const regNumber tempReg = isFloatReg(reg1) ? codeGen->rsGetRsvdReg() : reg1; dst += emitOutput_UTypeInstr(dst, INS_auipc, tempReg, UpperNBitsOfWordSignExtend<20>(immediate)); dst += emitOutput_ITypeInstr(dst, *ins, reg1, tempReg, LowerNBitsOfWord<12>(immediate)); + + if (id->idIsReloc()) + { + emitRecordRelocation(dstBase, emitDataOffsetToPtr(offset), CorInfoReloc::RISCV64_PCREL_I); + } + return dst; } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index acd4be1edf083e..115dfff1265a93 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4357,7 +4357,7 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += lvaLclStackHomeSize(lvaMonAcquired); } -#ifndef TARGET_LOONGARCH64 +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) if ((lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !opts.IsOSR()) { int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + (compCalleeRegsPushed << 3); diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index 5db276b73476d3..e0113cbc3a56b6 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -4,11 +4,12 @@ #include #include "AsmOffsets.inc" -#define PROBE_FRAME_SIZE 0xB0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + +#define PROBE_FRAME_SIZE 0xC0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + // 11 * 8 for callee saved registers + // 1 * 8 for caller SP + - // 2 * 8 for int returns + - // 4 * 8 for FP returns + // 3 * 8 for int returns (a0, a1, a2) + + // 4 * 8 for FP returns + + // 1 * 8 for alignment padding // Define the prolog for setting up the PInvokeTransitionFrame .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK @@ -32,15 +33,16 @@ # Slot at sp+0x78 is reserved for caller sp - # Save the integer return registers + # Save the integer return registers, a2 might contain an objectref (async continuation) sd a0, 0x80(sp) sd a1, 0x88(sp) + sd a2, 0x90(sp) # Save the FP return registers - fsd fa0, 0x90(sp) - fsd fa1, 0x98(sp) - fsd fa2, 0xa0(sp) - fsd fa3, 0xa8(sp) + fsd fa0, 0x98(sp) + fsd fa1, 0xa0(sp) + fsd fa2, 0xa8(sp) + fsd fa3, 0xb0(sp) # Perform the rest of the PInvokeTransitionFrame initialization. sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) @@ -60,12 +62,13 @@ // Restore the integer return registers ld a0, 0x80(sp) ld a1, 0x88(sp) + ld a2, 0x90(sp) // Restore the FP return registers - fld fa0, 0x90(sp) - fld fa1, 0x98(sp) - fld fa2, 0xa0(sp) - fld fa3, 0xa8(sp) + fld fa0, 0x98(sp) + fld fa1, 0xa0(sp) + fld fa2, 0xa8(sp) + fld fa3, 0xb0(sp) // Restore callee saved registers EPILOG_RESTORE_REG_PAIR s1, s2, 0x20 @@ -82,17 +85,17 @@ // Fix up the hijacked callstack .macro FixupHijackedCallstack - // a2 <- GetThread() + // a4 <- GetThread() mv t1, a0 - INLINE_GETTHREAD a2 + INLINE_GETTHREAD a4 mv a0, t1 // Fix the stack by restoring the original return address - ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a4) // Clear hijack state - sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a4) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a4) .endm // @@ -107,7 +110,7 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler jr ra LOCAL_LABEL(WaitForGC): - li t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + PTFF_THREAD_HIJACK) + li t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + PTFF_SAVE_A2 + PTFF_THREAD_HIJACK) tail C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack @@ -115,9 +118,9 @@ NESTED_END RhpGcProbeHijack // Wait for GC function NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler - PUSH_PROBE_FRAME a2, a3, t3 + PUSH_PROBE_FRAME a4, a3, t3 - ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) + ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a4) call C_FUNC(RhpWaitForGC2) POP_PROBE_FRAME diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index b1c56060127207..1c13077187046d 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -190,6 +190,7 @@ C_FUNC(\Name): #define PTFF_SAVE_SP 0x00000800 #define PTFF_SAVE_A0 0x00004000 #define PTFF_SAVE_A1 0x00008000 +#define PTFF_SAVE_A2 0x00010000 #define PTFF_SAVE_ALL_PRESERVED 0x000007FF // NOTE: S1-S11 #define PTFF_THREAD_HIJACK 0x80000000 diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index d8f541b8f88d03..e677f4104f5e1e 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -566,9 +566,10 @@ private static unsafe void PutRiscV64AuipcCombo(uint* pCode, long offset, bool i int hi20 = (int)(offset - lo12); Debug.Assert((long)lo12 + (long)hi20 == offset); - Debug.Assert(GetRiscV64AuipcCombo(pCode, isStype) == 0); + pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= (uint)hi20; int bottomBitsPos = isStype ? 7 : 20; + pCode[1] &= isStype ? 0x01FFF07Fu : 0x000FFFFFu; // keep non-immediate bits pCode[1] |= (uint)((lo12 >> 5) << 25); // top 7 bits are in the same spot pCode[1] |= (uint)((lo12 & 0x1F) << bottomBitsPos); Debug.Assert(GetRiscV64AuipcCombo(pCode, isStype) == offset); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 5c8b3e559215d9..0f2e4f9748fc1e 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -518,9 +518,10 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) INT32 hi20 = INT32(offset - lo12); _ASSERTE(INT64(lo12) + INT64(hi20) == offset); - _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); + pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; + pCode[1] &= isStype ? (UINT32)0x01FFF07F : (UINT32)0x000FFFFF; // keep non-immediate bits pCode[1] |= (lo12 >> 5) << 25; // top 7 bits are in the same spot pCode[1] |= (lo12 & 0x1F) << bottomBitsPos; _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == offset); diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index b85a788096c56b..717c8c40b33768 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -2291,9 +2291,10 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) INT32 hi20 = INT32(offset - lo12); _ASSERTE(INT64(lo12) + INT64(hi20) == offset); - _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); + pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; + pCode[1] &= isStype ? (UINT32)0x01FFF07F : (UINT32)0x000FFFFF; // keep non-immediate bits pCode[1] |= (lo12 >> 5) << 25; // top 7 bits are in the same spot pCode[1] |= (lo12 & 0x1F) << bottomBitsPos; _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == offset); diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 0d14aab155fc4f..5825dc4bd4a174 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -515,10 +515,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler // save any integral return value(s) sd a0, 120(sp) sd a1, 128(sp) + // save async continuation return value + sd a2, 136(sp) // save any FP/HFA return value(s) - fsd f0, 136(sp) - fsd f1, 144(sp) + fsd f0, 144(sp) + fsd f1, 152(sp) addi a0, sp, 0 call C_FUNC(OnHijackWorker) @@ -528,10 +530,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler // restore any integral return value(s) ld a0, 120(sp) ld a1, 128(sp) + // restore async continuation return value + ld a2, 136(sp) // restore any FP/HFA return value(s) - fld f0, 136(sp) - fld f1, 144(sp) + fld f0, 144(sp) + fld f1, 152(sp) EPILOG_RESTORE_REG_PAIR s1, s2, 16 EPILOG_RESTORE_REG_PAIR s3, s4, 32 diff --git a/src/coreclr/vm/riscv64/cgencpu.h b/src/coreclr/vm/riscv64/cgencpu.h index 17a924d228146c..456c3cc764ef1f 100644 --- a/src/coreclr/vm/riscv64/cgencpu.h +++ b/src/coreclr/vm/riscv64/cgencpu.h @@ -415,6 +415,11 @@ struct HijackArgs size_t ReturnValue[2]; }; union + { + DWORD64 A2; + size_t AsyncRet; + }; + union { struct { DWORD64 FA0; diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp index b05d86c0f15afe..645a45b26040de 100644 --- a/src/coreclr/vm/riscv64/stubs.cpp +++ b/src/coreclr/vm/riscv64/stubs.cpp @@ -467,9 +467,11 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->pCurrentContext->A0 = m_Args->A0; pRD->pCurrentContext->A1 = m_Args->A1; + pRD->pCurrentContext->A2 = m_Args->A2; pRD->volatileCurrContextPointers.A0 = &m_Args->A0; pRD->volatileCurrContextPointers.A1 = &m_Args->A1; + pRD->volatileCurrContextPointers.A2 = &m_Args->A2; pRD->pCurrentContext->S1 = m_Args->S1; From 547ef81a8ad6c5b8e3c89fa9d11c23b89ab637de Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Wed, 11 Mar 2026 14:39:55 +0000 Subject: [PATCH 2/4] Address CR fb --- src/coreclr/jit/codegenriscv64.cpp | 3 +++ src/coreclr/jit/lclvars.cpp | 16 ---------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 3e4b91797e4dad..543bffbd68959b 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5938,6 +5938,9 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, #ifdef FEATURE_REMAP_FUNCTION if (m_compiler->opts.compDbgEnC) { + // TODO: lvaMonAcquired, lvaAsyncExecutionContextVar and lvaAsyncSynchronizationContextVar locals are special + // that is necessary to allocate in the top of the stack frame and included as part of the EnC frame header + // for EnC to work. NYI_RISCV64("compDbgEnc in genCreateAndStoreGCInfo-----unimplemented/unused on RISCV64 yet----"); } #endif // FEATURE_REMAP_FUNCTION diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 115dfff1265a93..59bf4815f7f4e9 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4357,22 +4357,6 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += lvaLclStackHomeSize(lvaMonAcquired); } -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - if ((lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !opts.IsOSR()) - { - int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + (compCalleeRegsPushed << 3); - lvaTable[lvaAsyncExecutionContextVar].SetStackOffset(offset); - delta += lvaLclStackHomeSize(lvaAsyncExecutionContextVar); - } - - if ((lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !opts.IsOSR()) - { - int offset = lvaTable[lvaAsyncSynchronizationContextVar].GetStackOffset() + (compCalleeRegsPushed << 3); - lvaTable[lvaAsyncSynchronizationContextVar].SetStackOffset(offset); - delta += lvaLclStackHomeSize(lvaAsyncSynchronizationContextVar); - } -#endif - JITDUMP("--- delta bump %d for FP frame\n", delta); } #elif defined(TARGET_WASM) From 3c664cf34b8b2287554317a650d88f3ccf7a1b66 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Wed, 11 Mar 2026 23:51:38 +0000 Subject: [PATCH 3/4] Address CR fb --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 9 +++------ .../Common/Compiler/DependencyAnalysis/Relocation.cs | 1 + src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp | 1 + src/coreclr/utilcode/util.cpp | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index e0113cbc3a56b6..8522bc01037412 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -4,11 +4,11 @@ #include #include "AsmOffsets.inc" -#define PROBE_FRAME_SIZE 0xC0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + +#define PROBE_FRAME_SIZE 0xB0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + // 11 * 8 for callee saved registers + // 1 * 8 for caller SP + // 3 * 8 for int returns (a0, a1, a2) + - // 4 * 8 for FP returns + + // 2 * 8 for FP returns (fa0, fa1) + // 1 * 8 for alignment padding // Define the prolog for setting up the PInvokeTransitionFrame @@ -41,8 +41,7 @@ # Save the FP return registers fsd fa0, 0x98(sp) fsd fa1, 0xa0(sp) - fsd fa2, 0xa8(sp) - fsd fa3, 0xb0(sp) + # Slot at sp+0xa8 is alignment padding # Perform the rest of the PInvokeTransitionFrame initialization. sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) @@ -67,8 +66,6 @@ // Restore the FP return registers fld fa0, 0x98(sp) fld fa1, 0xa0(sp) - fld fa2, 0xa8(sp) - fld fa3, 0xb0(sp) // Restore callee saved registers EPILOG_RESTORE_REG_PAIR s1, s2, 0x20 diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index e677f4104f5e1e..5a667e49643d06 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -566,6 +566,7 @@ private static unsafe void PutRiscV64AuipcCombo(uint* pCode, long offset, bool i int hi20 = (int)(offset - lo12); Debug.Assert((long)lo12 + (long)hi20 == offset); + Debug.Assert(GetRiscV64AuipcCombo(pCode, isStype) == 0); pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= (uint)hi20; int bottomBitsPos = isStype ? 7 : 20; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 0f2e4f9748fc1e..4e92f38a9772a3 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -518,6 +518,7 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) INT32 hi20 = INT32(offset - lo12); _ASSERTE(INT64(lo12) + INT64(hi20) == offset); + _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index 717c8c40b33768..83f25519dec111 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -2291,6 +2291,7 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) INT32 hi20 = INT32(offset - lo12); _ASSERTE(INT64(lo12) + INT64(hi20) == offset); + _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; From 9fda3ab14a6924ef50b9986a1430a946d8e9e175 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Thu, 12 Mar 2026 00:06:43 +0000 Subject: [PATCH 4/4] Revert defensive changes after adding back assert --- .../tools/Common/Compiler/DependencyAnalysis/Relocation.cs | 2 -- src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp | 2 -- src/coreclr/utilcode/util.cpp | 2 -- 3 files changed, 6 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 5a667e49643d06..d8f541b8f88d03 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -567,10 +567,8 @@ private static unsafe void PutRiscV64AuipcCombo(uint* pCode, long offset, bool i Debug.Assert((long)lo12 + (long)hi20 == offset); Debug.Assert(GetRiscV64AuipcCombo(pCode, isStype) == 0); - pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= (uint)hi20; int bottomBitsPos = isStype ? 7 : 20; - pCode[1] &= isStype ? 0x01FFF07Fu : 0x000FFFFFu; // keep non-immediate bits pCode[1] |= (uint)((lo12 >> 5) << 25); // top 7 bits are in the same spot pCode[1] |= (uint)((lo12 & 0x1F) << bottomBitsPos); Debug.Assert(GetRiscV64AuipcCombo(pCode, isStype) == offset); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 4e92f38a9772a3..5c8b3e559215d9 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -519,10 +519,8 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) _ASSERTE(INT64(lo12) + INT64(hi20) == offset); _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); - pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; - pCode[1] &= isStype ? (UINT32)0x01FFF07F : (UINT32)0x000FFFFF; // keep non-immediate bits pCode[1] |= (lo12 >> 5) << 25; // top 7 bits are in the same spot pCode[1] |= (lo12 & 0x1F) << bottomBitsPos; _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == offset); diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index 83f25519dec111..b85a788096c56b 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -2292,10 +2292,8 @@ void PutRiscV64AuipcCombo(UINT32 * pCode, INT64 offset, bool isStype) _ASSERTE(INT64(lo12) + INT64(hi20) == offset); _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == 0); - pCode[0] &= 0x00000FFF; // keep bits 11-0 (opcode + rd) pCode[0] |= hi20; int bottomBitsPos = isStype ? 7 : 20; - pCode[1] &= isStype ? (UINT32)0x01FFF07F : (UINT32)0x000FFFFF; // keep non-immediate bits pCode[1] |= (lo12 >> 5) << 25; // top 7 bits are in the same spot pCode[1] |= (lo12 & 0x1F) << bottomBitsPos; _ASSERTE(GetRiscV64AuipcCombo(pCode, isStype) == offset);