From 8d48da0fb92c75b7a4cf0f806b93eb7043856ec8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 13:29:21 +0200 Subject: [PATCH 01/32] WIP --- src/coreclr/jit/codegen.h | 19 +++----- src/coreclr/jit/codegenarm64.cpp | 68 +++++++++++++++++++++++------ src/coreclr/jit/codegenarmarch.cpp | 12 +++--- src/coreclr/jit/codegencommon.cpp | 69 ++++++++++++++++++------------ src/coreclr/jit/codegenwasm.cpp | 2 +- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/lclvars.cpp | 1 + 7 files changed, 113 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 8e7fb1f6b29f5e..2234c2c24b5f90 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -363,6 +363,8 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD + + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #if defined(TARGET_ARM64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -380,9 +382,10 @@ class CodeGen final : public CodeGenInterface int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero); + bool* pTmpRegIsZero, + bool unwindOnly = false); - void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); + void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly); void genEpilogRestoreRegPair(regNumber reg1, regNumber reg2, @@ -422,14 +425,12 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); - void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false); void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); + void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly = false); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); - #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -443,16 +444,10 @@ class CodeGen final : public CodeGenInterface void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset); - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); - -#else - void genPushCalleeSavedRegisters(); #endif -#if defined(TARGET_AMD64) void genOSRRecordTier0CalleeSavedRegistersAndFrame(); void genOSRSaveRemainingCalleeSavedRegisters(); -#endif // TARGET_AMD64 void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 15ee1ef3768a29..b1c02130f873c2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -461,6 +461,7 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. +// unwindOnly - If true, we only generate unwind codes, and do not actually emit instructions to save // // Return Value: // None. @@ -471,7 +472,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero) + bool* pTmpRegIsZero, + bool unwindOnly) { assert(spOffset >= 0); assert(spDelta <= 0); @@ -483,6 +485,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if (spDelta != 0) { assert(!useSaveNextPair); + assert(!unwindOnly); + if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing. @@ -509,7 +513,10 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); assert((spOffset % 8) == 0); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + if (!unwindOnly) + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + } if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) { @@ -545,11 +552,12 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. +// unwindOnly - If true, we only generate unwind codes, and do not actually emit instructions to save // // Return Value: // None. -void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly) { assert(spOffset >= 0); assert(spDelta <= 0); @@ -558,6 +566,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum bool needToSaveRegs = true; if (spDelta != 0) { + assert(!unwindOnly); if ((spOffset == 0) && (spDelta >= -256)) { // We can use pre-index addressing. @@ -576,9 +585,12 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum if (needToSaveRegs) { - // str REG, [SP, #offset] - // 64-bit STR offset range: 0 to 32760, multiple of 8. - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + if (!unwindOnly) + { + // str REG, [SP, #offset] + // 64-bit STR offset range: 0 to 32760, multiple of 8. + GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + } m_compiler->unwindSaveReg(reg1, spOffset); } } @@ -839,8 +851,9 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) // regsMask - a mask of registers for prolog generation; // spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); // spOffset - the offset from SP that is the beginning of the callee-saved register area; +// unwindOnly - if true, only generate unwind codes, and do not actually emit instructions to save. // -void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -855,12 +868,12 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // We can use a STP instruction. if (genReverseAndPairCalleeSavedRegisters) { - genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr); + genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr, unwindOnly); } else { genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0, - nullptr); + nullptr, unwindOnly); } spOffset += 2 * slotSize; @@ -868,7 +881,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i else { // No register pair; we use a STR instruction. - genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr); + genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr, unwindOnly); spOffset += slotSize; } @@ -913,7 +926,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // The save set can contain LR in which case LR is saved along with the other callee-saved registers. // But currently Jit doesn't use frames without frame pointer on arm64. // -void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly) { assert(spDelta <= 0); assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); @@ -921,6 +934,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe unsigned regsToSaveCount = genCountBits(regsToSaveMask); if (regsToSaveCount == 0) { + assert(!unwindOnly); if (spDelta != 0) { // Currently this is the case for varargs only @@ -943,21 +957,21 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe if (maskSaveRegsFloat != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset, unwindOnly); spDelta = 0; lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsInt != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset, unwindOnly); spDelta = 0; lowestCalleeSavedOffset += genCountBits(maskSaveRegsInt) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsFrame != RBM_NONE) { - genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr); + genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr, unwindOnly); // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. } } @@ -5532,6 +5546,32 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) #endif // FEATURE_SIMD +void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + // Figure out which set of int callee saves was already saved by Tier0. + // Emit appropriate unwind. + // + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); + assert((tier0CalleeSaves & RBM_ALLINT) == tier0CalleeSaves); + int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\n"); + + m_compiler->unwindAllocStack((unsigned)patchpointInfo->TotalFrameSize()); + + assert((tier0CalleeSaves & (RBM_FP | RBM_LR)) == (RBM_FP | RBM_LR)); + + int offset = patchpointInfo->TotalFrameSize() - tier0CalleeSaveUsedSize; + genSaveCalleeSavedRegistersHelp(tier0CalleeSaves, offset, /* spDelta */ 0, /* unwindOnly */ true); +} + #ifdef PROFILING_SUPPORTED //----------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ff1f2ccd7131ee..91fb3a6c8a051b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4285,16 +4285,12 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -// Arguments (arm64): +// Arguments: // initReg - A scratch register (that gets set to zero on some platforms). // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero, // 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched. // -#if defined(TARGET_ARM64) void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) -#else -void CodeGen::genPushCalleeSavedRegisters() -#endif { assert(m_compiler->compGeneratingProlog); @@ -4344,6 +4340,12 @@ void CodeGen::genPushCalleeSavedRegisters() // rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) + if (m_compiler->opts.IsOSR()) + { + PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; + rsPushRegs &= ~ppi->CalleeSaveRegisters(); + } + regSet.rsMaskCalleeSaved = rsPushRegs; #ifdef DEBUG diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d01f10408d9bfe..e73a429b4a5474 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4967,6 +4967,15 @@ void CodeGen::genFinalizeFrame() #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 +#if defined(TARGET_ARM64) + // We inherit registers saved by tier0. Avoid saving those. + if (m_compiler->opts.IsOSR()) + { + PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; + maskCalleeRegsPushed &= ~ppi->CalleeSaveRegisters(); + } +#endif + m_compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); #ifdef DEBUG @@ -5053,11 +5062,11 @@ void CodeGen::genFnProlog() genBeginFnProlog(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For arm64 OSR, emit a "phantom prolog" to account for the actions taken +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // For some targets, emit a "phantom prolog" to account for the actions taken // in the tier0 frame that impact FP and SP on entry to the OSR method. // - // x64 handles this differently; the phantom prolog unwind is emitted in + // x64/arm64 handle this differently; the phantom prolog unwind is emitted in // genOSRRecordTier0CalleeSavedRegistersAndFrame. // if (m_compiler->opts.IsOSR()) @@ -5320,10 +5329,10 @@ void CodeGen::genFnProlog() const bool isRoot = (m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); -#ifdef TARGET_AMD64 - const bool isOSRx64Root = isRoot && m_compiler->opts.IsOSR(); +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + const bool inheritsCalleeSaves = isRoot && m_compiler->opts.IsOSR(); #else - const bool isOSRx64Root = false; + const bool inheritsCalleeSaves = false; #endif // TARGET_AMD64 regMaskTP tempMask = initRegs & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; @@ -5352,7 +5361,7 @@ void CodeGen::genFnProlog() // For x64 OSR root frames, we can't use any as of yet unsaved // callee save as initReg, as we defer saving these until later in // the prolog, and we don't have normal arg regs. - if (isOSRx64Root) + if (inheritsCalleeSaves) { initReg = REG_SCRATCH; // REG_EAX } @@ -5360,7 +5369,7 @@ void CodeGen::genFnProlog() // For arm64 OSR root frames, we may need a scratch register for large // offset addresses. Use a register that won't be allocated. // - if (isRoot && m_compiler->opts.IsOSR()) + if (inheritsCalleeSaves) { initReg = REG_IP1; } @@ -5419,23 +5428,24 @@ void CodeGen::genFnProlog() unsigned extraFrameSize = 0; -#ifdef TARGET_XARCH - -#ifdef TARGET_AMD64 - if (isOSRx64Root) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + if (inheritsCalleeSaves) { // Account for the Tier0 callee saves // genOSRRecordTier0CalleeSavedRegistersAndFrame(); +#ifdef TARGET_AMD64 // We don't actually push any callee saves on the OSR frame, // but we still reserve space, so account for this when // allocating the local frame. // extraFrameSize = m_compiler->compCalleeRegsPushed * REGSIZE_BYTES; +#endif } -#endif // TARGET_AMD64 +#endif +#ifdef TARGET_XARCH if (doubleAlignOrFramePointerUsed()) { // OSR methods handle "saving" FP specially. @@ -5444,7 +5454,7 @@ void CodeGen::genFnProlog() // Tier0 method. The save we do here is just to set up a // proper RBP-based frame chain link. // - if (isOSRx64Root && isFramePointerUsed()) + if (inheritsCalleeSaves && isFramePointerUsed()) { GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0); inst_RV(INS_push, initReg, TYP_REF); @@ -5460,9 +5470,9 @@ void CodeGen::genFnProlog() inst_RV(INS_push, REG_FPBASE, TYP_REF); m_compiler->unwindPush(REG_FPBASE); } -#ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp" +#ifdef TARGET_X86 // On AMD64, establish the frame pointer after the "sub rsp" genEstablishFramePointer(0, /*reportUnwindData*/ true); -#endif // !TARGET_AMD64 +#endif // TARGET_X86 #if DOUBLE_ALIGN if (m_compiler->genDoubleAlign()) @@ -5476,16 +5486,21 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - genPushCalleeSavedRegisters(initReg, &initRegZeroed); - -#else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 + bool pushesCalleeSaves = true; +#ifdef TARGET_AMD64 + // For OSR x64 we need canonical epilogs (sequence of pops). Hence we do + // not push any register in the prolog, we rather store them in the area + // allocated by the tier0 method. + // For OSR on other platforms we have no such requirement, instead we + // restore tier0 saved callee saves from its area, but then push the additional + // callee saves in the OSR method prologs as normal. + pushesCalleeSaves = !inheritsCalleeSaves; +#endif - if (!isOSRx64Root) + if (pushesCalleeSaves) { - genPushCalleeSavedRegisters(); + genPushCalleeSavedRegisters(initReg, &initRegZeroed); } -#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_ARM bool needToEstablishFP = false; @@ -5530,14 +5545,14 @@ void CodeGen::genFnProlog() } #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 -#ifdef TARGET_AMD64 - // For x64 OSR we have to finish saving int callee saves. + // For x64 OSR we have to finish saving callee saves. // - if (isOSRx64Root) +#ifdef TARGET_AMD64 + if (inheritsCalleeSaves) { genOSRSaveRemainingCalleeSavedRegisters(); } -#endif // TARGET_AMD64 +#endif //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index e552f55940cd69..bb6d56c4e01360 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -85,7 +85,7 @@ void CodeGen::genBeginFnProlog() //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: no-op since we don't need to save anything. // -void CodeGen::genPushCalleeSavedRegisters() +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) { } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 4afff4ae217058..50f23d50927fd1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10011,7 +10011,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -void CodeGen::genPushCalleeSavedRegisters() +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) { assert(m_compiler->compGeneratingProlog); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index c53e8a69078cc4..8eabb5baa170cb 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5487,6 +5487,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Default configuration codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || opts.compDbgEnC || + doesMethodHavePatchpoints() || compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); } } From f1d98d17bafa8b8a2d612aac65321d125e25b917 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 14:41:49 +0200 Subject: [PATCH 02/32] WIP --- src/coreclr/jit/codegenarm64.cpp | 94 +++++++++++++++++------------- src/coreclr/jit/codegenarmarch.cpp | 78 ++++++++++++++++++------- src/coreclr/jit/compiler.cpp | 6 +- src/coreclr/vm/jitinterface.cpp | 43 ++++++++++++++ 4 files changed, 158 insertions(+), 63 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index b1c02130f873c2..690cd047f36856 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -45,6 +45,11 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register) + if (m_compiler->opts.IsOSR()) + { + rsRestoreRegs &= ~m_compiler->info.compPatchpointInfo->CalleeSaveRegisters(); + } + regMaskTP regsToRestoreMask = rsRestoreRegs; const int totalFrameSize = genTotalFrameSize(); @@ -55,6 +60,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) const int calleeSaveSpOffset = m_compiler->compFrameInfo.calleeSaveSpOffset; const int calleeSaveSpDelta = m_compiler->compFrameInfo.calleeSaveSpDelta; const int offsetSpToSavedFp = m_compiler->compFrameInfo.offsetSpToSavedFp; + bool restoreFplr = (rsRestoreRegs & RBM_FP) != 0; switch (frameType) { @@ -127,12 +133,19 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2); m_compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); - // Generate: - // ldp fp,lr,[sp] - // add sp,sp,#remainingFrameSz + if (restoreFplr) + { + // Generate: + // ldp fp,lr,[sp] + // add sp,sp,#remainingFrameSz - JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); - genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr); + JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); + genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr); + } + else + { + genStackPointerAdjustment(spAdjustment2, REG_IP1, nullptr, /* reportUnwindData */ true); + } } else { @@ -146,15 +159,22 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) m_compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - // Generate: - // ldp fp,lr,[sp,#outsz] - // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if - // ; it's large + if (restoreFplr) + { + // Generate: + // ldp fp,lr,[sp,#outsz] + // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if + // ; it's large - JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); + JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); - genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false, - REG_IP1, nullptr); + genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false, + REG_IP1, nullptr); + } + else + { + genStackPointerAdjustment(remainingFrameSz, REG_IP1, nullptr, /* reportUnwindData */ true); + } } // Unlike frameType=1 or frameType=2 that restore SP at the end, @@ -213,12 +233,20 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { case 1: { - // Generate: - // ldp fp,lr,[sp],#framesz + if (restoreFplr) + { + // Generate: + // ldp fp,lr,[sp],#framesz - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, - INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, + INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + m_compiler->unwindAllocStack(totalFrameSize); + } break; } @@ -228,9 +256,12 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) // ldp fp,lr,[sp,#outsz] // add sp,sp,#framesz - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, - m_compiler->lvaOutgoingArgSpaceSize); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); + if (restoreFplr) + { + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, + m_compiler->lvaOutgoingArgSpaceSize); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); + } GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); @@ -250,30 +281,15 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } } - // For OSR, we must also adjust the SP to remove the Tier0 frame. + // For OSR we must also restore callee saves from tier0 frame and pop it. // if (m_compiler->opts.IsOSR()) { PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; const int tier0FrameSize = patchpointInfo->TotalFrameSize(); - JITDUMP("Extra SP adjust for OSR to pop off Tier0 frame: %d bytes\n", tier0FrameSize); - // Tier0 size may exceed simple immediate. We're in the epilog so not clear if we can - // use a scratch reg. So just do two subtracts if necessary. - // - int spAdjust = tier0FrameSize; - if (!GetEmitter()->emitIns_valid_imm_for_add(tier0FrameSize, EA_PTRSIZE)) - { - const int lowPart = spAdjust & 0xFFF; - const int highPart = spAdjust - lowPart; - assert(GetEmitter()->emitIns_valid_imm_for_add(highPart, EA_PTRSIZE)); - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, highPart); - m_compiler->unwindAllocStack(highPart); - spAdjust = lowPart; - } - assert(GetEmitter()->emitIns_valid_imm_for_add(spAdjust, EA_PTRSIZE)); - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spAdjust); - m_compiler->unwindAllocStack(spAdjust); + regsToRestoreMask = patchpointInfo->CalleeSaveRegisters(); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, tier0FrameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES, tier0FrameSize); } } @@ -5557,7 +5573,7 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() // PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - assert((tier0CalleeSaves & RBM_ALLINT) == tier0CalleeSaves); + assert((tier0CalleeSaves & RBM_ALLFLOAT) == RBM_NONE); int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; JITDUMP("--OSR--- tier0 has already saved "); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 91fb3a6c8a051b..899550bbe29aea 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4484,13 +4484,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the // first save instruction as a "predecrement" amount, if possible. int calleeSaveSpDelta = 0; + bool saveFplr = (maskSaveRegsInt & RBM_FP) != 0; if (isFramePointerUsed()) { - // We need to save both FP and LR. - - assert((maskSaveRegsInt & RBM_FP) != 0); - assert((maskSaveRegsInt & RBM_LR) != 0); + // Either we need to save both FP and LR or none of them. The latter + // happens only for OSR functions that inherit FP/LR from the tier0 + // frame. + assert(((maskSaveRegsInt & RBM_FP) != 0) == ((maskSaveRegsInt & RBM_LR) != 0)); // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address // (FP and LR) are protected from buffer overrun by the GS cookie. If FP/LR are at the lowest addresses, @@ -4534,12 +4535,21 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, - INS_OPTS_PRE_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + if (saveFplr) + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, + INS_OPTS_PRE_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR - offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR + offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR + } + else + { + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + m_compiler->unwindAllocStack(totalFrameSize); + offset = m_compiler->compLclFrameSize; + } } else if ((totalFrameSize <= 512) && !m_compiler->opts.compDbgEnC) { @@ -4585,14 +4595,21 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); - assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); + if (saveFplr) + { + assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, - m_compiler->lvaOutgoingArgSpaceSize); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, + m_compiler->lvaOutgoingArgSpaceSize); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR - offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR + offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR + } + else + { + offset = (int)m_compiler->compLclFrameSize; + } } } else @@ -4681,10 +4698,13 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe frameType = 3; - calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later. + if (saveFplr) + { + calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later. - // We'll take care of these later, but callee-saved regs code shouldn't see them. - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); + // We'll take care of these later, but callee-saved regs code shouldn't see them. + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); + } } assert(calleeSaveSpDeltaUnaligned >= 0); @@ -4779,7 +4799,15 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" spAdjustment2=%d\n", spAdjustment2); - genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); + if (saveFplr) + { + genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); + } + else + { + genStackPointerAdjustment(-spAdjustment2, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + offset += spAdjustment2; // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" @@ -4804,8 +4832,16 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } else { - genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, - pInitRegZeroed); + if (saveFplr) + { + genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, + pInitRegZeroed); + } + else + { + genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + offset += remainingFrameSz; offsetSpToSavedFp = m_compiler->lvaOutgoingArgSpaceSize; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 30c911b0ac18b3..1c9720987f0aa7 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5724,17 +5724,17 @@ void Compiler::generatePatchpointInfo() patchpointInfo->AsyncSynchronizationContextOffset()); } -#if defined(TARGET_AMD64) // Record callee save registers. - // Currently only needed for x64. // regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask(); rsPushRegs |= RBM_FPBASE; +#ifdef TARGET_ARM64 + rsPushRegs |= RBM_LR; +#endif patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs); JITDUMP("--OSR-- Tier0 callee saves: "); JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); JITDUMP("\n"); -#endif // Register this with the runtime. info.compCompHnd->setPatchpointInfo(patchpointInfo); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 9dd542b94c03c5..3fc85926a48e27 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11386,6 +11386,17 @@ void CEECodeGenInfo::reportMetadata( EE_TO_JIT_TRANSITION_LEAF(); } +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) +struct AltJitPatchpointInfo +{ + AltJitPatchpointInfo* Next; + MethodDesc* Method; + PatchpointInfo* Info; +}; + +static AltJitPatchpointInfo* s_altJitPatchpointInfoList; +#endif + void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) { CONTRACTL { @@ -11400,6 +11411,23 @@ void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) // We receive ownership of the array _ASSERTE(m_pPatchpointInfoFromJit == NULL); m_pPatchpointInfoFromJit = patchpointInfo; + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) + { + uint32_t ppiSize = patchpointInfo->PatchpointInfoSize(); + PatchpointInfo *newPpi = new (new uint8_t[ppiSize]) PatchpointInfo; + newPpi->Initialize(patchpointInfo->NumberOfLocals(), patchpointInfo->TotalFrameSize()); + newPpi->Copy(patchpointInfo); + + AltJitPatchpointInfo* newInfo = new AltJitPatchpointInfo; + newInfo->Next = s_altJitPatchpointInfoList; + newInfo->Method = m_pMethodBeingCompiled; + newInfo->Info = newPpi; + s_altJitPatchpointInfoList = newInfo; + } +#endif + #else UNREACHABLE(); #endif @@ -11423,6 +11451,21 @@ PatchpointInfo* CEEJitInfo::getOSRInfo(unsigned* ilOffset) #ifdef FEATURE_ON_STACK_REPLACEMENT result = m_pPatchpointInfoFromRuntime; *ilOffset = m_ilOffset; + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) + { + for (AltJitPatchpointInfo* altJitPpi = s_altJitPatchpointInfoList; altJitPpi != NULL; altJitPpi = altJitPpi->Next) + { + if (altJitPpi->Method == m_pMethodBeingCompiled) + { + result = altJitPpi->Info; + break; + } + } + } +#endif + #endif EE_TO_JIT_TRANSITION(); From e3d29e75bf5e89dbdabaa31d38ef211a5fb5ebc5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 14:46:31 +0200 Subject: [PATCH 03/32] WIP --- src/coreclr/jit/compiler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1c9720987f0aa7..b9dbea6bb64d41 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5731,9 +5731,9 @@ void Compiler::generatePatchpointInfo() #ifdef TARGET_ARM64 rsPushRegs |= RBM_LR; #endif - patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs); + patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.GetIntRegSet()); JITDUMP("--OSR-- Tier0 callee saves: "); - JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); + JITDUMPEXEC(dspRegMask(regMaskTP::FromIntRegSet((SingleTypeRegSet)patchpointInfo->CalleeSaveRegisters()))); JITDUMP("\n"); // Register this with the runtime. From 2179ae75e7c0c300c6d0f05569cc891c214e0a0f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 16:35:59 +0200 Subject: [PATCH 04/32] WIP --- src/coreclr/jit/codegenarm64.cpp | 37 ++++++++++++++++--- src/coreclr/jit/codegenarmarch.cpp | 4 +- .../superpmi-shared/methodcontext.cpp | 8 +++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 690cd047f36856..92065952418cfd 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -286,10 +286,27 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) if (m_compiler->opts.IsOSR()) { PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - const int tier0FrameSize = patchpointInfo->TotalFrameSize(); - regsToRestoreMask = patchpointInfo->CalleeSaveRegisters(); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, tier0FrameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES, tier0FrameSize); + + int frameSize = patchpointInfo->TotalFrameSize(); + int calleeSavesOffset = frameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES; + + if (frameSize >= 504) + { + // We cannot represent ldp x0, x1, [sp, #frameSize] because frameSize is too large. + // Do a separate restore up until the callee saved registers first. + int firstAdjustment = calleeSavesOffset; + if ((calleeSavesOffset % 16) != 0) + { + firstAdjustment = calleeSavesOffset - 8; + } + + genStackPointerAdjustment(firstAdjustment, REG_IP1, nullptr, /* reportUnwindData */ true); + calleeSavesOffset -= firstAdjustment; + frameSize -= firstAdjustment; + } + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSavesOffset, frameSize); } } @@ -5580,12 +5597,20 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); JITDUMP("\n"); - m_compiler->unwindAllocStack((unsigned)patchpointInfo->TotalFrameSize()); + // We do this stack allocation in two steps: first the space for the callee saves at the top, + // so that we know we can always report that unwinding information, and then the rest after. + m_compiler->unwindAllocStack(tier0CalleeSaveUsedSize); assert((tier0CalleeSaves & (RBM_FP | RBM_LR)) == (RBM_FP | RBM_LR)); - int offset = patchpointInfo->TotalFrameSize() - tier0CalleeSaveUsedSize; - genSaveCalleeSavedRegistersHelp(tier0CalleeSaves, offset, /* spDelta */ 0, /* unwindOnly */ true); + genSaveCalleeSavedRegistersHelp(tier0CalleeSaves, 0, /* spDelta */ 0, /* unwindOnly */ true); + + int remainingStack = patchpointInfo->TotalFrameSize() - tier0CalleeSaveUsedSize; + assert(remainingStack >= 0); + if (remainingStack > 0) + { + m_compiler->unwindAllocStack((unsigned)remainingStack); + } } #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 899550bbe29aea..ac66821e1df806 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4340,14 +4340,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) + regSet.rsMaskCalleeSaved = rsPushRegs; + if (m_compiler->opts.IsOSR()) { PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; rsPushRegs &= ~ppi->CalleeSaveRegisters(); } - regSet.rsMaskCalleeSaved = rsPushRegs; - #ifdef DEBUG if (m_compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 1e9fe7a1de880f..2306c3e7845a43 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -4607,7 +4607,13 @@ PatchpointInfo* MethodContext::repGetOSRInfo(unsigned* ilOffset) DEBUG_REP(dmpGetOSRInfo(key, value)); *ilOffset = value.ilOffset; - return (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index); + PatchpointInfo* ppi = (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index); + if (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) + { + // Set FP | LR since old collections do not have it and that makes things assert. + ppi->SetCalleeSaveRegisters(ppi->CalleeSaveRegisters() | (1UL << 29) | (1UL << 30)); + } + return ppi; } void MethodContext::recGetClassModuleIdForStatics(CORINFO_CLASS_HANDLE cls, From 448d7cdfdd0e7ec6638c88ed35e9ca654a3f3130 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 16:36:32 +0200 Subject: [PATCH 05/32] Run jit-format --- src/coreclr/jit/codegen.h | 9 ++++++--- src/coreclr/jit/codegenarm64.cpp | 30 ++++++++++++++++++------------ src/coreclr/jit/codegenarmarch.cpp | 11 ++++++----- src/coreclr/jit/lclvars.cpp | 3 +-- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2234c2c24b5f90..d6d3ff01ebeceb 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -363,7 +363,6 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #if defined(TARGET_ARM64) bool genInstrWithConstant(instruction ins, @@ -385,7 +384,8 @@ class CodeGen final : public CodeGenInterface bool* pTmpRegIsZero, bool unwindOnly = false); - void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly); + void genPrologSaveReg( + regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly); void genEpilogRestoreRegPair(regNumber reg1, regNumber reg2, @@ -428,7 +428,10 @@ class CodeGen final : public CodeGenInterface void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false); void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly = false); + void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, + int lowestCalleeSavedOffset, + int spDelta, + bool unwindOnly = false); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 92065952418cfd..3de3f7ddc72c56 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -60,7 +60,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) const int calleeSaveSpOffset = m_compiler->compFrameInfo.calleeSaveSpOffset; const int calleeSaveSpDelta = m_compiler->compFrameInfo.calleeSaveSpDelta; const int offsetSpToSavedFp = m_compiler->compFrameInfo.offsetSpToSavedFp; - bool restoreFplr = (rsRestoreRegs & RBM_FP) != 0; + bool restoreFplr = (rsRestoreRegs & RBM_FP) != 0; switch (frameType) { @@ -140,7 +140,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) // add sp,sp,#remainingFrameSz JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); - genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr); + genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, + nullptr); } else { @@ -168,8 +169,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); - genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false, - REG_IP1, nullptr); + genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, + false, REG_IP1, nullptr); } else { @@ -286,9 +287,9 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) if (m_compiler->opts.IsOSR()) { PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regsToRestoreMask = patchpointInfo->CalleeSaveRegisters(); + regsToRestoreMask = patchpointInfo->CalleeSaveRegisters(); - int frameSize = patchpointInfo->TotalFrameSize(); + int frameSize = patchpointInfo->TotalFrameSize(); int calleeSavesOffset = frameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES; if (frameSize >= 504) @@ -590,7 +591,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly) +void CodeGen::genPrologSaveReg( + regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly) { assert(spOffset >= 0); assert(spDelta <= 0); @@ -901,7 +903,8 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // We can use a STP instruction. if (genReverseAndPairCalleeSavedRegisters) { - genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr, unwindOnly); + genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr, + unwindOnly); } else { @@ -959,7 +962,10 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // The save set can contain LR in which case LR is saved along with the other callee-saved registers. // But currently Jit doesn't use frames without frame pointer on arm64. // -void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly) +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, + int lowestCalleeSavedOffset, + int spDelta, + bool unwindOnly) { assert(spDelta <= 0); assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); @@ -5588,10 +5594,10 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() // Figure out which set of int callee saves was already saved by Tier0. // Emit appropriate unwind. // - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); assert((tier0CalleeSaves & RBM_ALLFLOAT) == RBM_NONE); - int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; + int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ac66821e1df806..2eb13ac9d6b42b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4483,8 +4483,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the // first save instruction as a "predecrement" amount, if possible. - int calleeSaveSpDelta = 0; - bool saveFplr = (maskSaveRegsInt & RBM_FP) != 0; + int calleeSaveSpDelta = 0; + bool saveFplr = (maskSaveRegsInt & RBM_FP) != 0; if (isFramePointerUsed()) { @@ -4801,7 +4801,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (saveFplr) { - genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); + genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, + pInitRegZeroed); } else { @@ -4834,8 +4835,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { if (saveFplr) { - genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, - pInitRegZeroed); + genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, + initReg, pInitRegZeroed); } else { diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 8eabb5baa170cb..facf2c66d7d5b1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5486,8 +5486,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() { // Default configuration codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || - opts.compDbgEnC || - doesMethodHavePatchpoints() || + opts.compDbgEnC || doesMethodHavePatchpoints() || compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); } } From 10cff172db6d9b206d69e9ec22d8240443715aff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 18:46:41 +0200 Subject: [PATCH 06/32] Fix --- src/coreclr/jit/codegenarm64.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3de3f7ddc72c56..8aae688b3fb084 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5136,8 +5136,12 @@ int CodeGenInterface::genSPtoFPdelta() const { // The saved frame pointer is at the top of the frame, just beneath the saved varargs register space and the // saved LR. - delta = genTotalFrameSize() - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - - 2 /* FP, LR */ * REGSIZE_BYTES; + delta = genTotalFrameSize() - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); + + if (!m_compiler->opts.IsOSR()) + { + delta = delta - 2 /* FP, LR */ * REGSIZE_BYTES; + } } else { From 27db1a7f6e49a8547db818c1a68bf79f639f302e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 18:53:39 +0200 Subject: [PATCH 07/32] Fix again --- src/coreclr/jit/lclvars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index facf2c66d7d5b1..d5c06661efca1c 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4330,7 +4330,7 @@ void Compiler::lvaFixVirtualFrameOffsets() if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) { // We set FP to be after LR, FP - frameLocalsDelta = 2 * REGSIZE_BYTES; + frameLocalsDelta = opts.IsOSR() ? 0 : 2 * REGSIZE_BYTES; frameBoundary = opts.IsOSR() ? -info.compPatchpointInfo->TotalFrameSize() : 0; if (info.compIsVarArgs) frameBoundary -= MAX_REG_ARG * REGSIZE_BYTES; From 05131c3a6a26c851a63b085cf98869d0b5a9f136 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 19:54:25 +0200 Subject: [PATCH 08/32] More hacking --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/codegenarmarch.cpp | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8aae688b3fb084..45d95c7022d7b8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5140,7 +5140,7 @@ int CodeGenInterface::genSPtoFPdelta() const if (!m_compiler->opts.IsOSR()) { - delta = delta - 2 /* FP, LR */ * REGSIZE_BYTES; + delta -= 2 /* FP, LR */ * REGSIZE_BYTES; } } else diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 2eb13ac9d6b42b..d99ee658d27539 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4851,15 +4851,21 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe else if (frameType == 4) { assert(genSaveFpLrWithAllCalleeSavedRegisters); - offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - - 2 * REGSIZE_BYTES; // -2 for FP, LR + offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); + if (!m_compiler->opts.IsOSR()) + { + offsetSpToSavedFp -= 2 * REGSIZE_BYTES; // -2 for FP, LR + } } else if (frameType == 5) { assert(genSaveFpLrWithAllCalleeSavedRegisters); - offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - - 2 * REGSIZE_BYTES; // -2 for FP, LR + offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); + if (!m_compiler->opts.IsOSR())- + { + offsetSpToSavedFp -= 2 * REGSIZE_BYTES; // -2 for FP, LR + } JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); From 480d01e5cbd7dce251687c357b3efd03f34edeff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Apr 2026 19:55:46 +0200 Subject: [PATCH 09/32] Build break --- src/coreclr/jit/codegenarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index d99ee658d27539..55e13c3efee9bc 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4862,7 +4862,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe assert(genSaveFpLrWithAllCalleeSavedRegisters); offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); - if (!m_compiler->opts.IsOSR())- + if (!m_compiler->opts.IsOSR()) { offsetSpToSavedFp -= 2 * REGSIZE_BYTES; // -2 for FP, LR } From 771df5f87991e13e34f9f33c20314b2ceec4e75d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:05:41 +0200 Subject: [PATCH 10/32] Switch to restoring callee saves instead --- src/coreclr/jit/codegen.h | 16 +- src/coreclr/jit/codegenarm64.cpp | 256 ++++++++++++++--------------- src/coreclr/jit/codegenarmarch.cpp | 87 +++------- src/coreclr/jit/codegencommon.cpp | 6 +- src/coreclr/jit/codegenxarch.cpp | 4 +- src/coreclr/jit/lclvars.cpp | 2 +- 6 files changed, 161 insertions(+), 210 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index d6d3ff01ebeceb..cce518c3e9f916 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -385,17 +385,19 @@ class CodeGen final : public CodeGenInterface bool unwindOnly = false); void genPrologSaveReg( - regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly); + regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); - void genEpilogRestoreRegPair(regNumber reg1, + void genRestoreRegPair(regNumber reg1, regNumber reg2, + regNumber baseReg, int spOffset, int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero); + bool* pTmpRegIsZero, + bool reportUnwindData); - void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); + void genRestoreReg(regNumber reg1, regNumber baseReg, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); // A simple struct to keep register pairs for prolog and epilog. struct RegPair @@ -426,13 +428,13 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false); - void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly = false); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, @@ -449,7 +451,7 @@ class CodeGen final : public CodeGenInterface void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset); #endif - void genOSRRecordTier0CalleeSavedRegistersAndFrame(); + void genOSRHandleTier0CalleeSavedRegistersAndFrame(); void genOSRSaveRemainingCalleeSavedRegisters(); void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 45d95c7022d7b8..3d2d23b92b6f68 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -38,6 +38,11 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) regMaskTP rsRestoreRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); + if (m_compiler->opts.IsOSR()) + { + rsRestoreRegs &= ~m_compiler->info.compPatchpointInfo->CalleeSaveRegisters(); + } + if (isFramePointerUsed()) { rsRestoreRegs |= RBM_FPBASE; @@ -45,11 +50,6 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register) - if (m_compiler->opts.IsOSR()) - { - rsRestoreRegs &= ~m_compiler->info.compPatchpointInfo->CalleeSaveRegisters(); - } - regMaskTP regsToRestoreMask = rsRestoreRegs; const int totalFrameSize = genTotalFrameSize(); @@ -60,7 +60,6 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) const int calleeSaveSpOffset = m_compiler->compFrameInfo.calleeSaveSpOffset; const int calleeSaveSpDelta = m_compiler->compFrameInfo.calleeSaveSpDelta; const int offsetSpToSavedFp = m_compiler->compFrameInfo.offsetSpToSavedFp; - bool restoreFplr = (rsRestoreRegs & RBM_FP) != 0; switch (frameType) { @@ -133,20 +132,13 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2); m_compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); - if (restoreFplr) - { - // Generate: - // ldp fp,lr,[sp] - // add sp,sp,#remainingFrameSz + // Generate: + // ldp fp,lr,[sp] + // add sp,sp,#remainingFrameSz - JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); - genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, - nullptr); - } - else - { - genStackPointerAdjustment(spAdjustment2, REG_IP1, nullptr, /* reportUnwindData */ true); - } + JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, alignmentAdjustment2, spAdjustment2, false, REG_IP1, + nullptr, /* reportUnwindData */ true); } else { @@ -160,22 +152,15 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) m_compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - if (restoreFplr) - { - // Generate: - // ldp fp,lr,[sp,#outsz] - // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if - // ; it's large + // Generate: + // ldp fp,lr,[sp,#outsz] + // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if + // ; it's large - JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); + JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); - genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, - false, REG_IP1, nullptr); - } - else - { - genStackPointerAdjustment(remainingFrameSz, REG_IP1, nullptr, /* reportUnwindData */ true); - } + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, + false, REG_IP1, nullptr, /* reportUnwindData */ true); } // Unlike frameType=1 or frameType=2 that restore SP at the end, @@ -228,26 +213,18 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } JITDUMP(" calleeSaveSpOffset=%d, calleeSaveSpDelta=%d\n", calleeSaveSpOffset, calleeSaveSpDelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSpOffset, calleeSaveSpDelta); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, calleeSaveSpOffset, calleeSaveSpDelta, /* reportUnwindData */ true); switch (frameType) { case 1: { - if (restoreFplr) - { - // Generate: - // ldp fp,lr,[sp],#framesz + // Generate: + // ldp fp,lr,[sp],#framesz - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, - INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); - m_compiler->unwindAllocStack(totalFrameSize); - } + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, + INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); break; } @@ -257,12 +234,9 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) // ldp fp,lr,[sp,#outsz] // add sp,sp,#framesz - if (restoreFplr) - { - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, - m_compiler->lvaOutgoingArgSpaceSize); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); - } + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, + m_compiler->lvaOutgoingArgSpaceSize); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); @@ -282,32 +256,30 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } } - // For OSR we must also restore callee saves from tier0 frame and pop it. + // For OSR, we must also adjust the SP to remove the Tier0 frame. // if (m_compiler->opts.IsOSR()) { PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regsToRestoreMask = patchpointInfo->CalleeSaveRegisters(); + const int tier0FrameSize = patchpointInfo->TotalFrameSize(); + JITDUMP("Extra SP adjust for OSR to pop off Tier0 frame: %d bytes\n", tier0FrameSize); - int frameSize = patchpointInfo->TotalFrameSize(); - int calleeSavesOffset = frameSize - genCountBits(regsToRestoreMask) * REGSIZE_BYTES; - - if (frameSize >= 504) + // Tier0 size may exceed simple immediate. We're in the epilog so not clear if we can + // use a scratch reg. So just do two subtracts if necessary. + // + int spAdjust = tier0FrameSize; + if (!GetEmitter()->emitIns_valid_imm_for_add(tier0FrameSize, EA_PTRSIZE)) { - // We cannot represent ldp x0, x1, [sp, #frameSize] because frameSize is too large. - // Do a separate restore up until the callee saved registers first. - int firstAdjustment = calleeSavesOffset; - if ((calleeSavesOffset % 16) != 0) - { - firstAdjustment = calleeSavesOffset - 8; - } - - genStackPointerAdjustment(firstAdjustment, REG_IP1, nullptr, /* reportUnwindData */ true); - calleeSavesOffset -= firstAdjustment; - frameSize -= firstAdjustment; + const int lowPart = spAdjust & 0xFFF; + const int highPart = spAdjust - lowPart; + assert(GetEmitter()->emitIns_valid_imm_for_add(highPart, EA_PTRSIZE)); + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, highPart); + m_compiler->unwindAllocStack(highPart); + spAdjust = lowPart; } - - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSavesOffset, frameSize); + assert(GetEmitter()->emitIns_valid_imm_for_add(spAdjust, EA_PTRSIZE)); + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spAdjust); + m_compiler->unwindAllocStack(spAdjust); } } @@ -631,7 +603,7 @@ void CodeGen::genPrologSaveReg( } //------------------------------------------------------------------------ -// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. +// genRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that // instruction. @@ -651,15 +623,17 @@ void CodeGen::genPrologSaveReg( // Return Value: // None. -void CodeGen::genEpilogRestoreRegPair(regNumber reg1, +void CodeGen::genRestoreRegPair(regNumber reg1, regNumber reg2, + regNumber baseReg, int spOffset, int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero) + bool* pTmpRegIsZero, + bool reportUnwindData) { - assert(spOffset >= 0); + assert((spOffset >= -512) && (spOffset <= 504)); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both @@ -673,42 +647,52 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + + if (reportUnwindData) + { + m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + } } else // (spOffset != 0) || (spDelta > 504) { // Can't fold in the SP change; need to use a separate ADD instruction. // ldp reg1, reg2, [SP, #offset] - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); + if (reportUnwindData) + { + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + } // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ reportUnwindData); } } else { - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); - if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) + if (reportUnwindData) { - useSaveNextPair = false; - } + if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) + { + useSaveNextPair = false; + } - if (useSaveNextPair) - { - m_compiler->unwindSaveNext(); - } - else - { - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + if (useSaveNextPair) + { + m_compiler->unwindSaveNext(); + } + else + { + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + } } } } //------------------------------------------------------------------------ -// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. +// genRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. // // Arguments: // reg1 - Register to restore. @@ -722,7 +706,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +void CodeGen::genRestoreReg(regNumber reg1, regNumber baseReg, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData) { assert(spOffset >= 0); assert(spDelta >= 0); @@ -734,24 +718,36 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg { // We can use post-index addressing. // ldr REG, [SP], #spDelta - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spDelta, INS_OPTS_POST_INDEX); + + if (reportUnwindData) + { + m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); + } } else // (spOffset != 0) || (spDelta > 255) { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - m_compiler->unwindSaveReg(reg1, spOffset); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg(reg1, spOffset); + } // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, reportUnwindData); } } else { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - m_compiler->unwindSaveReg(reg1, spOffset); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg(reg1, spOffset); + } } } @@ -1023,7 +1019,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); // spOffset - the offset from SP that is the beginning of the callee-saved register area; // -void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -1049,18 +1045,18 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta if (genReverseAndPairCalleeSavedRegisters) { - genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr); + genRestoreRegPair(regPair.reg2, regPair.reg1, baseReg, spOffset, stackDelta, false, REG_IP1, nullptr, reportUnwindData); } else { - genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, - REG_IP1, nullptr); + genRestoreRegPair(regPair.reg1, regPair.reg2, baseReg, spOffset, stackDelta, regPair.useSaveNextPair, + REG_IP1, nullptr, reportUnwindData); } } else { spOffset -= slotSize; - genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr); + genRestoreReg(regPair.reg1, baseReg, spOffset, stackDelta, REG_IP1, nullptr, reportUnwindData); } } } @@ -1096,7 +1092,7 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData) { assert(spDelta >= 0); unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); @@ -1132,20 +1128,20 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in { int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta; spOffset -= 2 * REGSIZE_BYTES; - genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr); + genRestoreRegPair(REG_FP, REG_LR, baseReg, spOffset, spFrameDelta, false, REG_IP1, nullptr, reportUnwindData); } if (maskRestoreRegsInt != RBM_NONE) { int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, baseReg, spIntDelta, spOffset, reportUnwindData); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, baseReg, spDelta, spOffset, reportUnwindData); // No need to update spOffset since it's not used after this. } } @@ -1596,7 +1592,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, lowestCalleeSavedOffset, 0, /* reportUnwindData */ true); if (genFuncletInfo.fiFrameType == 1) { @@ -5136,12 +5132,8 @@ int CodeGenInterface::genSPtoFPdelta() const { // The saved frame pointer is at the top of the frame, just beneath the saved varargs register space and the // saved LR. - delta = genTotalFrameSize() - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); - - if (!m_compiler->opts.IsOSR()) - { - delta -= 2 /* FP, LR */ * REGSIZE_BYTES; - } + delta = genTotalFrameSize() - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - + 2 /* FP, LR */ * REGSIZE_BYTES; } else { @@ -5589,7 +5581,7 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) #endif // FEATURE_SIMD -void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { assert(m_compiler->compGeneratingProlog); assert(m_compiler->opts.IsOSR()); @@ -5600,27 +5592,23 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() // PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - assert((tier0CalleeSaves & RBM_ALLFLOAT) == RBM_NONE); int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); - JITDUMP("\n"); - - // We do this stack allocation in two steps: first the space for the callee saves at the top, - // so that we know we can always report that unwinding information, and then the rest after. - m_compiler->unwindAllocStack(tier0CalleeSaveUsedSize); - - assert((tier0CalleeSaves & (RBM_FP | RBM_LR)) == (RBM_FP | RBM_LR)); - - genSaveCalleeSavedRegistersHelp(tier0CalleeSaves, 0, /* spDelta */ 0, /* unwindOnly */ true); - - int remainingStack = patchpointInfo->TotalFrameSize() - tier0CalleeSaveUsedSize; - assert(remainingStack >= 0); - if (remainingStack > 0) - { - m_compiler->unwindAllocStack((unsigned)remainingStack); - } + JITDUMP("\nEmitting restores\n"); + + // Note: the restore of LR relies on the tier0 method having been unhijacked when the OSR method prolog runs. + // This happens in the transition helper. If transition helper is not used (e.g. because we directly jump into OSR) + // then hijacking tier0 is not supported -- this is similar to tailcalls so the situation can be recorded via + // SetHasTailCalls. + // FP is pointing to the FP/LR pair. That pair is always saved at the top, + // so add 2*REGSIZE_BYTES to get to the end of the callee saves, then + // subtract total size of callee saves to get to the beginning. + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 2 * REGSIZE_BYTES - tier0CalleeSaveUsedSize, /* spDelta */ 0, /* reportUnwindData */ false); + m_compiler->unwindPadding(); + + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); } #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 55e13c3efee9bc..f2c14410cb4329 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4345,7 +4345,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (m_compiler->opts.IsOSR()) { PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; - rsPushRegs &= ~ppi->CalleeSaveRegisters(); + rsPushRegs &= (~ppi->CalleeSaveRegisters()) | RBM_FPBASE | RBM_LR; } #ifdef DEBUG @@ -4484,7 +4484,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the // first save instruction as a "predecrement" amount, if possible. int calleeSaveSpDelta = 0; - bool saveFplr = (maskSaveRegsInt & RBM_FP) != 0; if (isFramePointerUsed()) { @@ -4535,21 +4534,12 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - if (saveFplr) - { - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, - INS_OPTS_PRE_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, + INS_OPTS_PRE_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR - offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR - } - else - { - GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); - m_compiler->unwindAllocStack(totalFrameSize); - offset = m_compiler->compLclFrameSize; - } + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR + offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR } else if ((totalFrameSize <= 512) && !m_compiler->opts.compDbgEnC) { @@ -4595,21 +4585,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); - if (saveFplr) - { - assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); + assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, - m_compiler->lvaOutgoingArgSpaceSize); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, + m_compiler->lvaOutgoingArgSpaceSize); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR - offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR - } - else - { - offset = (int)m_compiler->compLclFrameSize; - } + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR + offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR } } else @@ -4698,13 +4681,10 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe frameType = 3; - if (saveFplr) - { - calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later. + calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later. - // We'll take care of these later, but callee-saved regs code shouldn't see them. - maskSaveRegsInt &= ~(RBM_FP | RBM_LR); - } + // We'll take care of these later, but callee-saved regs code shouldn't see them. + maskSaveRegsInt &= ~(RBM_FP | RBM_LR); } assert(calleeSaveSpDeltaUnaligned >= 0); @@ -4799,15 +4779,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" spAdjustment2=%d\n", spAdjustment2); - if (saveFplr) - { - genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, - pInitRegZeroed); - } - else - { - genStackPointerAdjustment(-spAdjustment2, initReg, pInitRegZeroed, /* reportUnwindData */ true); - } + genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, + pInitRegZeroed); offset += spAdjustment2; @@ -4833,15 +4806,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } else { - if (saveFplr) - { - genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, - initReg, pInitRegZeroed); - } - else - { - genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true); - } + genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, + initReg, pInitRegZeroed); offset += remainingFrameSz; @@ -4851,21 +4817,16 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe else if (frameType == 4) { assert(genSaveFpLrWithAllCalleeSavedRegisters); - offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); - if (!m_compiler->opts.IsOSR()) - { - offsetSpToSavedFp -= 2 * REGSIZE_BYTES; // -2 for FP, LR - } + offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - + 2 * REGSIZE_BYTES; // -2 for FP, LR } else if (frameType == 5) { assert(genSaveFpLrWithAllCalleeSavedRegisters); - offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0); - if (!m_compiler->opts.IsOSR()) - { - offsetSpToSavedFp -= 2 * REGSIZE_BYTES; // -2 for FP, LR - } + offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - + 2 * REGSIZE_BYTES; // -2 for FP, LR + JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e73a429b4a5474..bfde4d4feaeb34 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4968,11 +4968,11 @@ void CodeGen::genFinalizeFrame() #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 #if defined(TARGET_ARM64) - // We inherit registers saved by tier0. Avoid saving those. + // We inherit registers saved by tier0. Avoid saving those, except FP/LR that we always save. if (m_compiler->opts.IsOSR()) { PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; - maskCalleeRegsPushed &= ~ppi->CalleeSaveRegisters(); + maskCalleeRegsPushed &= (~ppi->CalleeSaveRegisters()) | RBM_FPBASE | RBM_LR; } #endif @@ -5433,7 +5433,7 @@ void CodeGen::genFnProlog() { // Account for the Tier0 callee saves // - genOSRRecordTier0CalleeSavedRegistersAndFrame(); + genOSRHandleTier0CalleeSavedRegistersAndFrame(); #ifdef TARGET_AMD64 // We don't actually push any callee saves on the OSR frame, diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 50f23d50927fd1..b696890a21a7f8 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -9860,11 +9860,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #ifdef TARGET_AMD64 //------------------------------------------------------------------------ -// genOSRRecordTier0CalleeSavedRegistersAndFrame: for OSR methods, record the +// genOSRHandleTier0CalleeSavedRegistersAndFrame: for OSR methods, record the // subset of callee saves already saved by the Tier0 method, and the frame // created by Tier0. // -void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { assert(m_compiler->compGeneratingProlog); assert(m_compiler->opts.IsOSR()); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index d5c06661efca1c..facf2c66d7d5b1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4330,7 +4330,7 @@ void Compiler::lvaFixVirtualFrameOffsets() if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) { // We set FP to be after LR, FP - frameLocalsDelta = opts.IsOSR() ? 0 : 2 * REGSIZE_BYTES; + frameLocalsDelta = 2 * REGSIZE_BYTES; frameBoundary = opts.IsOSR() ? -info.compPatchpointInfo->TotalFrameSize() : 0; if (info.compIsVarArgs) frameBoundary -= MAX_REG_ARG * REGSIZE_BYTES; From 3474bd1adbe644414553f0a512a609244d29bf87 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:12:43 +0200 Subject: [PATCH 11/32] Clean up --- src/coreclr/jit/codegen.h | 38 +++++++----- src/coreclr/jit/codegenarm64.cpp | 92 ++++++++++++++---------------- src/coreclr/jit/codegenarmarch.cpp | 24 +++----- src/coreclr/jit/codegencommon.cpp | 9 --- 4 files changed, 75 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index cce518c3e9f916..81b421b437fc66 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -381,23 +381,28 @@ class CodeGen final : public CodeGenInterface int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero, - bool unwindOnly = false); + bool* pTmpRegIsZero); void genPrologSaveReg( regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); void genRestoreRegPair(regNumber reg1, - regNumber reg2, - regNumber baseReg, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData); - - void genRestoreReg(regNumber reg1, regNumber baseReg, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); + regNumber reg2, + regNumber baseReg, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData); + + void genRestoreReg(regNumber reg1, + regNumber baseReg, + int spOffset, + int spDelta, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData); // A simple struct to keep register pairs for prolog and epilog. struct RegPair @@ -428,13 +433,18 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false); - void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); + void genRestoreCalleeSavedRegisterGroup( + regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta, bool unwindOnly = false); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + int spDelta, + bool reportUnwindData); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3d2d23b92b6f68..10919f2e3783b2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -38,11 +38,6 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) regMaskTP rsRestoreRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); - if (m_compiler->opts.IsOSR()) - { - rsRestoreRegs &= ~m_compiler->info.compPatchpointInfo->CalleeSaveRegisters(); - } - if (isFramePointerUsed()) { rsRestoreRegs |= RBM_FPBASE; @@ -138,7 +133,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, alignmentAdjustment2, spAdjustment2, false, REG_IP1, - nullptr, /* reportUnwindData */ true); + nullptr, /* reportUnwindData */ true); } else { @@ -160,7 +155,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, - false, REG_IP1, nullptr, /* reportUnwindData */ true); + false, REG_IP1, nullptr, /* reportUnwindData */ true); } // Unlike frameType=1 or frameType=2 that restore SP at the end, @@ -213,7 +208,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } JITDUMP(" calleeSaveSpOffset=%d, calleeSaveSpDelta=%d\n", calleeSaveSpOffset, calleeSaveSpDelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, calleeSaveSpOffset, calleeSaveSpDelta, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, calleeSaveSpOffset, calleeSaveSpDelta, + /* reportUnwindData */ true); switch (frameType) { @@ -467,7 +463,6 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. -// unwindOnly - If true, we only generate unwind codes, and do not actually emit instructions to save // // Return Value: // None. @@ -478,8 +473,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, int spDelta, bool useSaveNextPair, regNumber tmpReg, - bool* pTmpRegIsZero, - bool unwindOnly) + bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); @@ -491,7 +485,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if (spDelta != 0) { assert(!useSaveNextPair); - assert(!unwindOnly); if ((spOffset == 0) && (spDelta >= -512)) { @@ -519,10 +512,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); assert((spOffset % 8) == 0); - if (!unwindOnly) - { - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - } + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) { @@ -558,13 +548,12 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. -// unwindOnly - If true, we only generate unwind codes, and do not actually emit instructions to save // // Return Value: // None. void CodeGen::genPrologSaveReg( - regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly) + regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); @@ -573,7 +562,6 @@ void CodeGen::genPrologSaveReg( bool needToSaveRegs = true; if (spDelta != 0) { - assert(!unwindOnly); if ((spOffset == 0) && (spDelta >= -256)) { // We can use pre-index addressing. @@ -592,12 +580,9 @@ void CodeGen::genPrologSaveReg( if (needToSaveRegs) { - if (!unwindOnly) - { - // str REG, [SP, #offset] - // 64-bit STR offset range: 0 to 32760, multiple of 8. - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - } + // str REG, [SP, #offset] + // 64-bit STR offset range: 0 to 32760, multiple of 8. + GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); m_compiler->unwindSaveReg(reg1, spOffset); } } @@ -624,14 +609,14 @@ void CodeGen::genPrologSaveReg( // None. void CodeGen::genRestoreRegPair(regNumber reg1, - regNumber reg2, - regNumber baseReg, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData) + regNumber reg2, + regNumber baseReg, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData) { assert((spOffset >= -512) && (spOffset <= 504)); assert(spDelta >= 0); @@ -706,7 +691,13 @@ void CodeGen::genRestoreRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genRestoreReg(regNumber reg1, regNumber baseReg, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData) +void CodeGen::genRestoreReg(regNumber reg1, + regNumber baseReg, + int spOffset, + int spDelta, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData) { assert(spOffset >= 0); assert(spDelta >= 0); @@ -882,9 +873,8 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) // regsMask - a mask of registers for prolog generation; // spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); // spOffset - the offset from SP that is the beginning of the callee-saved register area; -// unwindOnly - if true, only generate unwind codes, and do not actually emit instructions to save. // -void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly) +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -899,13 +889,12 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // We can use a STP instruction. if (genReverseAndPairCalleeSavedRegisters) { - genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr, - unwindOnly); + genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr); } else { genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0, - nullptr, unwindOnly); + nullptr); } spOffset += 2 * slotSize; @@ -913,7 +902,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i else { // No register pair; we use a STR instruction. - genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr, unwindOnly); + genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_IP0, nullptr); spOffset += slotSize; } @@ -1019,7 +1008,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); // spOffset - the offset from SP that is the beginning of the callee-saved register area; // -void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegisterGroup( + regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -1045,12 +1035,13 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber b if (genReverseAndPairCalleeSavedRegisters) { - genRestoreRegPair(regPair.reg2, regPair.reg1, baseReg, spOffset, stackDelta, false, REG_IP1, nullptr, reportUnwindData); + genRestoreRegPair(regPair.reg2, regPair.reg1, baseReg, spOffset, stackDelta, false, REG_IP1, nullptr, + reportUnwindData); } else { genRestoreRegPair(regPair.reg1, regPair.reg2, baseReg, spOffset, stackDelta, regPair.useSaveNextPair, - REG_IP1, nullptr, reportUnwindData); + REG_IP1, nullptr, reportUnwindData); } } else @@ -1092,7 +1083,8 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, regNumber b // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp( + regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData) { assert(spDelta >= 0); unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); @@ -1592,7 +1584,8 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, lowestCalleeSavedOffset, 0, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, lowestCalleeSavedOffset, 0, + /* reportUnwindData */ true); if (genFuncletInfo.fiFrameType == 1) { @@ -5590,9 +5583,9 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() // Figure out which set of int callee saves was already saved by Tier0. // Emit appropriate unwind. // - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); + int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); @@ -5605,7 +5598,8 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() // FP is pointing to the FP/LR pair. That pair is always saved at the top, // so add 2*REGSIZE_BYTES to get to the end of the callee saves, then // subtract total size of callee saves to get to the beginning. - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 2 * REGSIZE_BYTES - tier0CalleeSaveUsedSize, /* spDelta */ 0, /* reportUnwindData */ false); + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 2 * REGSIZE_BYTES - tier0CalleeSaveUsedSize, + /* spDelta */ 0, /* reportUnwindData */ false); m_compiler->unwindPadding(); m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index f2c14410cb4329..60abe7fa996839 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4342,12 +4342,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe regSet.rsMaskCalleeSaved = rsPushRegs; - if (m_compiler->opts.IsOSR()) - { - PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; - rsPushRegs &= (~ppi->CalleeSaveRegisters()) | RBM_FPBASE | RBM_LR; - } - #ifdef DEBUG if (m_compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) { @@ -4483,14 +4477,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the // first save instruction as a "predecrement" amount, if possible. - int calleeSaveSpDelta = 0; + int calleeSaveSpDelta = 0; if (isFramePointerUsed()) { - // Either we need to save both FP and LR or none of them. The latter - // happens only for OSR functions that inherit FP/LR from the tier0 - // frame. - assert(((maskSaveRegsInt & RBM_FP) != 0) == ((maskSaveRegsInt & RBM_LR) != 0)); + // We need to save both FP and LR. + + assert((maskSaveRegsInt & RBM_FP) != 0); + assert((maskSaveRegsInt & RBM_LR) != 0); // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address // (FP and LR) are protected from buffer overrun by the GS cookie. If FP/LR are at the lowest addresses, @@ -4779,8 +4773,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" spAdjustment2=%d\n", spAdjustment2); - genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, - pInitRegZeroed); + genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); offset += spAdjustment2; @@ -4806,8 +4799,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } else { - genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, - initReg, pInitRegZeroed); + genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, + pInitRegZeroed); offset += remainingFrameSz; @@ -4826,7 +4819,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe offsetSpToSavedFp = calleeSaveSpDelta - (m_compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - 2 * REGSIZE_BYTES; // -2 for FP, LR - JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bfde4d4feaeb34..8cbeafa56d23ab 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4967,15 +4967,6 @@ void CodeGen::genFinalizeFrame() #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 -#if defined(TARGET_ARM64) - // We inherit registers saved by tier0. Avoid saving those, except FP/LR that we always save. - if (m_compiler->opts.IsOSR()) - { - PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo; - maskCalleeRegsPushed &= (~ppi->CalleeSaveRegisters()) | RBM_FPBASE | RBM_LR; - } -#endif - m_compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); #ifdef DEBUG From 60aa1f85abe9f05ad1cf6d6c276bc8b3d14891ff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:13:07 +0200 Subject: [PATCH 12/32] Run jit-format --- src/coreclr/jit/codegenarm64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 10919f2e3783b2..fa91e65d411ef4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -552,8 +552,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genPrologSaveReg( - regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); From ec1a0884c6569beaac431faae75f31f3fe083366 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:23:24 +0200 Subject: [PATCH 13/32] Fix --- src/coreclr/jit/codegen.h | 10 +++------- src/coreclr/jit/codegenarm64.cpp | 12 ++++-------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 81b421b437fc66..6a7eb50ff9e76d 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -383,8 +383,7 @@ class CodeGen final : public CodeGenInterface regNumber tmpReg, bool* pTmpRegIsZero); - void genPrologSaveReg( - regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); + void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); void genRestoreRegPair(regNumber reg1, regNumber reg2, @@ -432,14 +431,11 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); - void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false); + void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); void genRestoreCalleeSavedRegisterGroup( regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); - void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, - int lowestCalleeSavedOffset, - int spDelta, - bool unwindOnly = false); + void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index fa91e65d411ef4..100cd287190e39 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -946,10 +946,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // The save set can contain LR in which case LR is saved along with the other callee-saved registers. // But currently Jit doesn't use frames without frame pointer on arm64. // -void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, - int lowestCalleeSavedOffset, - int spDelta, - bool unwindOnly) +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { assert(spDelta <= 0); assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); @@ -957,7 +954,6 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, unsigned regsToSaveCount = genCountBits(regsToSaveMask); if (regsToSaveCount == 0) { - assert(!unwindOnly); if (spDelta != 0) { // Currently this is the case for varargs only @@ -980,21 +976,21 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, if (maskSaveRegsFloat != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset, unwindOnly); + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); spDelta = 0; lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsInt != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset, unwindOnly); + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); spDelta = 0; lowestCalleeSavedOffset += genCountBits(maskSaveRegsInt) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsFrame != RBM_NONE) { - genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr, unwindOnly); + genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr); // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. } } From df8a0123a1420cc1f922127b8846adb24d1b6b85 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:34:21 +0200 Subject: [PATCH 14/32] Set FP for arm64 --- src/coreclr/vm/jithelpers.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 952824661b77fa..b4741e4117a32a 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1723,7 +1723,9 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti #endif // TARGET_WINDOWS pFrameContext->Rbp = currentFP; -#endif // TARGET_AMD64 +#elif defined(TARGET_ARM64) // TARGET_AMD64 + pFrameContext->Fp = currentFP; +#endif // TARGET_ARM64 SetSP(pFrameContext, currentSP); From a1e03801a6aab22079c13f3c2709da51dc86cf28 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:41:28 +0200 Subject: [PATCH 15/32] Remove assert --- src/coreclr/jit/codegenarm64.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 100cd287190e39..10a720f63eda76 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -698,7 +698,6 @@ void CodeGen::genRestoreReg(regNumber reg1, bool* pTmpRegIsZero, bool reportUnwindData) { - assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned From dbc2d707fe439119f0fae5a55b8cf0828df2d649 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 13:56:44 +0200 Subject: [PATCH 16/32] Fix overriding FP during restore --- src/coreclr/jit/codegen.h | 6 +--- src/coreclr/jit/codegenarm64.cpp | 50 ++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6a7eb50ff9e76d..784c1fc335fa4b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -436,11 +436,7 @@ class CodeGen final : public CodeGenInterface regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, - regNumber baseReg, - int lowestCalleeSavedOffset, - int spDelta, - bool reportUnwindData); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 10a720f63eda76..fae4655663b576 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -208,8 +208,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } JITDUMP(" calleeSaveSpOffset=%d, calleeSaveSpDelta=%d\n", calleeSaveSpOffset, calleeSaveSpDelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, calleeSaveSpOffset, calleeSaveSpDelta, - /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSpOffset, calleeSaveSpDelta); switch (frameType) { @@ -1000,7 +999,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Arguments: // regsMask - a mask of registers for epilog generation; // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); -// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// spOffset - the offset from SP that is the top of the callee-saved register area; // void CodeGen::genRestoreCalleeSavedRegisterGroup( regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) @@ -1077,8 +1076,7 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup( // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp( - regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, int spDelta, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) { assert(spDelta >= 0); unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); @@ -1114,20 +1112,23 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp( { int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta; spOffset -= 2 * REGSIZE_BYTES; - genRestoreRegPair(REG_FP, REG_LR, baseReg, spOffset, spFrameDelta, false, REG_IP1, nullptr, reportUnwindData); + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, spOffset, spFrameDelta, false, REG_IP1, nullptr, + /* reportUnwindData */ true); } if (maskRestoreRegsInt != RBM_NONE) { int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, baseReg, spIntDelta, spOffset, reportUnwindData); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, REG_SPBASE, spIntDelta, spOffset, + /* reportUnwindData */ true); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, baseReg, spDelta, spOffset, reportUnwindData); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, REG_SPBASE, spDelta, spOffset, + /* reportUnwindData */ true); // No need to update spOffset since it's not used after this. } } @@ -1578,8 +1579,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, lowestCalleeSavedOffset, 0, - /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 1) { @@ -5589,11 +5589,31 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() // This happens in the transition helper. If transition helper is not used (e.g. because we directly jump into OSR) // then hijacking tier0 is not supported -- this is similar to tailcalls so the situation can be recorded via // SetHasTailCalls. - // FP is pointing to the FP/LR pair. That pair is always saved at the top, - // so add 2*REGSIZE_BYTES to get to the end of the callee saves, then - // subtract total size of callee saves to get to the beginning. - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 2 * REGSIZE_BYTES - tier0CalleeSaveUsedSize, - /* spDelta */ 0, /* reportUnwindData */ false); + + regMaskTP restoreRegsFrame = tier0CalleeSaves & (RBM_FP | RBM_LR); + regMaskTP restoreRegsFloat = tier0CalleeSaves & RBM_ALLFLOAT; + regMaskTP restoreRegsInt = tier0CalleeSaves & ~restoreRegsFrame & ~restoreRegsFloat; + + // FP is pointing to the FP/LR pair. That pair is always saved at the top. + int calleeSavesTop = 2 * REGSIZE_BYTES; + int frameRegsBottom = calleeSavesTop - 2 * REGSIZE_BYTES; + int intRegsTop = frameRegsBottom; + int floatRegsTop = intRegsTop - genCountBits(restoreRegsInt) * REGSIZE_BYTES; + + if (restoreRegsInt != RBM_NONE) + { + genRestoreCalleeSavedRegisterGroup(restoreRegsInt, REG_FPBASE, 0, intRegsTop, /* reportUnwindData */ false); + } + + if (restoreRegsFloat != RBM_NONE) + { + genRestoreCalleeSavedRegisterGroup(restoreRegsFloat, REG_FPBASE, 0, floatRegsTop, /* reportUnwindData */ false); + } + + assert(restoreRegsFrame == (RBM_FP | RBM_LR)); + genRestoreRegPair(REG_FP, REG_LR, REG_FPBASE, frameRegsBottom, 0, false, REG_IP1, nullptr, + /* reportUnwindData */ false); + m_compiler->unwindPadding(); m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); From 4410ba3c0c00204d2a2a0c9e544991ccf9bb3e07 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 14:10:21 +0200 Subject: [PATCH 17/32] Clean up --- src/coreclr/jit/codegenarm64.cpp | 8 ++------ src/coreclr/jit/codegencommon.cpp | 3 +-- src/coreclr/jit/compiler.cpp | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index fae4655663b576..8d0337c32fda45 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5574,12 +5574,8 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() assert(m_compiler->opts.IsOSR()); assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); - // Figure out which set of int callee saves was already saved by Tier0. - // Emit appropriate unwind. - // - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves = (regMaskTP)patchpointInfo->CalleeSaveRegisters(); - int const tier0CalleeSaveUsedSize = genCountBits(tier0CalleeSaves) * REGSIZE_BYTES; + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8cbeafa56d23ab..f6510ad4dbb29c 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5058,7 +5058,7 @@ void CodeGen::genFnProlog() // in the tier0 frame that impact FP and SP on entry to the OSR method. // // x64/arm64 handle this differently; the phantom prolog unwind is emitted in - // genOSRRecordTier0CalleeSavedRegistersAndFrame. + // genOSRHandleTier0CalleeSavedRegistersAndFrame. // if (m_compiler->opts.IsOSR()) { @@ -5414,7 +5414,6 @@ void CodeGen::genFnProlog() #else // TARGET_WASM regNumber initReg = REG_NA; bool initRegZeroed = false; - bool isOSRx64Root = false; #endif // TARGET_WASM unsigned extraFrameSize = 0; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b9dbea6bb64d41..d76a16895311e9 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5731,9 +5731,9 @@ void Compiler::generatePatchpointInfo() #ifdef TARGET_ARM64 rsPushRegs |= RBM_LR; #endif - patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.GetIntRegSet()); + patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.getLow()); JITDUMP("--OSR-- Tier0 callee saves: "); - JITDUMPEXEC(dspRegMask(regMaskTP::FromIntRegSet((SingleTypeRegSet)patchpointInfo->CalleeSaveRegisters()))); + JITDUMPEXEC(dspRegMask(regMaskTP((regMaskSmall)patchpointInfo->CalleeSaveRegisters()))); JITDUMP("\n"); // Register this with the runtime. From 6dce58b0f7afa53ff7a7e1097ee190161308f338 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 14:29:55 +0200 Subject: [PATCH 18/32] Implement for LA64/RV64 --- src/coreclr/jit/codegen.h | 6 +--- src/coreclr/jit/codegenarm64.cpp | 5 +++ src/coreclr/jit/codegencommon.cpp | 45 +++----------------------- src/coreclr/jit/codegenloongarch64.cpp | 45 +++++++++++++++++++++++--- src/coreclr/jit/codegenriscv64.cpp | 45 +++++++++++++++++++++++--- src/coreclr/jit/codegenwasm.cpp | 2 +- 6 files changed, 92 insertions(+), 56 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 784c1fc335fa4b..ccfe9ba56e85f9 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -348,11 +348,7 @@ class CodeGen final : public CodeGenInterface unsigned lclNum, unsigned offset, unsigned paramLclNum, const ABIPassingSegment& seg, class RegGraph* graph); void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); void genEnregisterIncomingStackArgs(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); -#else - void genEnregisterOSRArgsAndLocals(); -#endif void genHomeStackSegment(unsigned lclNum, const ABIPassingSegment& seg, regNumber initReg, bool* pInitRegZeroed); void genHomeSwiftStructStackParameters(); @@ -450,7 +446,7 @@ class CodeGen final : public CodeGenInterface void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData); #endif void genOSRHandleTier0CalleeSavedRegistersAndFrame(); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8d0337c32fda45..48473632724393 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5568,6 +5568,11 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) #endif // FEATURE_SIMD +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { assert(m_compiler->compGeneratingProlog); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index f6510ad4dbb29c..521b6acf60ca51 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4135,11 +4135,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // initReg -- scratch register to use if needed // pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit) // -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) -#else -void CodeGen::genEnregisterOSRArgsAndLocals() -#endif { assert(m_compiler->opts.IsOSR()); PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; @@ -5320,11 +5316,7 @@ void CodeGen::genFnProlog() const bool isRoot = (m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) const bool inheritsCalleeSaves = isRoot && m_compiler->opts.IsOSR(); -#else - const bool inheritsCalleeSaves = false; -#endif // TARGET_AMD64 regMaskTP tempMask = initRegs & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; @@ -5348,37 +5340,16 @@ void CodeGen::genFnProlog() } } -#if defined(TARGET_AMD64) - // For x64 OSR root frames, we can't use any as of yet unsaved + // For OSR root frames, we can't use any as of yet unsaved // callee save as initReg, as we defer saving these until later in // the prolog, and we don't have normal arg regs. if (inheritsCalleeSaves) - { - initReg = REG_SCRATCH; // REG_EAX - } -#elif defined(TARGET_ARM64) - // For arm64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - // - if (inheritsCalleeSaves) - { - initReg = REG_IP1; - } -#elif defined(TARGET_LOONGARCH64) - // For LoongArch64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - if (isRoot && m_compiler->opts.IsOSR()) { initReg = REG_SCRATCH; - } -#elif defined(TARGET_RISCV64) - // For RISC-V64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - if (isRoot && m_compiler->opts.IsOSR()) - { - initReg = REG_SCRATCH; // REG_T0 - } +#if defined(TARGET_ARM64) + initReg = REG_IP1; #endif + } #if defined(TARGET_AMD64) // If we are a varargs call, in order to set up the arguments correctly this @@ -5414,11 +5385,11 @@ void CodeGen::genFnProlog() #else // TARGET_WASM regNumber initReg = REG_NA; bool initRegZeroed = false; + bool inheritsCalleeSaves = false; #endif // TARGET_WASM unsigned extraFrameSize = 0; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) if (inheritsCalleeSaves) { // Account for the Tier0 callee saves @@ -5433,7 +5404,6 @@ void CodeGen::genFnProlog() extraFrameSize = m_compiler->compCalleeRegsPushed * REGSIZE_BYTES; #endif } -#endif #ifdef TARGET_XARCH if (doubleAlignOrFramePointerUsed()) @@ -5672,12 +5642,7 @@ void CodeGen::genFnProlog() // we've set the live-in regs with values from the Tier0 frame. // // Otherwise we'll do some of these fetches twice. - -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed); -#else - genEnregisterOSRArgsAndLocals(); -#endif // OSR functions take no parameters in registers. Ensure no mappings // are present. assert((m_compiler->m_paramRegLocalMappings == nullptr) || m_compiler->m_paramRegLocalMappings->Empty()); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 2b3cc0821a8791..96d0ac39907268 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -257,7 +257,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld.d s8,sp,#xxx @@ -273,7 +275,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -294,8 +296,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -309,8 +315,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -319,6 +329,31 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\nEmitting restores\n"); + + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 16, /* reportUnwindData */ false); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FPBASE, 8); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FPBASE, 0); + + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); +} + // clang-format off /***************************************************************************** * diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index e88bee5a01c991..5fbf2c47392f9b 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -245,7 +245,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld s11, #xxx(sp) @@ -263,7 +265,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -284,8 +286,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -299,8 +305,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -309,6 +319,31 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\nEmitting restores\n"); + + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 16, /* reportUnwindData */ false); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FPBASE, 8); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FPBASE, 0); + + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); +} + // clang-format off /***************************************************************************** * diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index bb6d56c4e01360..04c6275ff32545 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -137,7 +137,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni //------------------------------------------------------------------------ // genEnregisterOSRArgsAndLocals: enregister OSR args and locals. // -void CodeGen::genEnregisterOSRArgsAndLocals() +void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) { unreached(); // OSR not supported on WASM. } From d8bddb7f4866a9ffc90c39ee8d7b98c4e44ec65d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 14:32:54 +0200 Subject: [PATCH 19/32] Clean up --- src/coreclr/vm/jithelpers.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index b4741e4117a32a..db4d323b5ce44a 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1721,13 +1721,10 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti SetSSP(pFrameContext, ssp - 8); } #endif // TARGET_WINDOWS - - pFrameContext->Rbp = currentFP; -#elif defined(TARGET_ARM64) // TARGET_AMD64 - pFrameContext->Fp = currentFP; #endif // TARGET_ARM64 SetSP(pFrameContext, currentSP); + SetFP(pFrameContext, currentFP); // Note we can get here w/o triggering, if there is an existing OSR method and // we hit the patchpoint. From b7d705e1efc6c84da08f2807e1b07bcc1476f957 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 14:48:30 +0200 Subject: [PATCH 20/32] AltJit based fixes --- src/coreclr/jit/codegen.h | 5 ++++- src/coreclr/jit/codegenarm64.cpp | 5 +++-- src/coreclr/jit/codegencommon.cpp | 4 ++-- src/coreclr/jit/codegenloongarch64.cpp | 17 ++++++++++------- src/coreclr/jit/codegenriscv64.cpp | 15 +++++++++------ src/coreclr/jit/compiler.cpp | 6 +++++- 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index ccfe9ba56e85f9..8707b4a940eb74 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -446,7 +446,10 @@ class CodeGen final : public CodeGenInterface void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData); #endif void genOSRHandleTier0CalleeSavedRegistersAndFrame(); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 48473632724393..3e315ddbf75801 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -594,7 +594,8 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // Arguments: // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. -// spOffset - The offset from SP to load reg1 (must be positive or zero). +// baseReg - Base register to load values from +// spOffset - The offset from the base register to load reg1 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This @@ -629,7 +630,7 @@ void CodeGen::genRestoreRegPair(regNumber reg1, { // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spDelta, INS_OPTS_POST_INDEX); if (reportUnwindData) { diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 521b6acf60ca51..bf24f29e226fcc 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5383,8 +5383,8 @@ void CodeGen::genFnProlog() } #endif // TARGET_ARM #else // TARGET_WASM - regNumber initReg = REG_NA; - bool initRegZeroed = false; + regNumber initReg = REG_NA; + bool initRegZeroed = false; bool inheritsCalleeSaves = false; #endif // TARGET_WASM diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 96d0ac39907268..f075858ec24c21 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -275,7 +275,10 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -341,15 +344,15 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + regMaskTP tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); JITDUMP("\nEmitting restores\n"); - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 16, /* reportUnwindData */ false); - emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FPBASE, 8); - emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FPBASE, 0); + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FP, 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FP, 0); m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); } @@ -539,7 +542,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6881,7 +6884,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 5fbf2c47392f9b..03ea018a131f4a 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -265,7 +265,10 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, regNumber baseReg, int lowestCalleeSavedOffset, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -337,9 +340,9 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); JITDUMP("\nEmitting restores\n"); - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves, REG_FPBASE, 16, /* reportUnwindData */ false); - emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FPBASE, 8); - emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FPBASE, 0); + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_FP, 8); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_FP, 0); m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); } @@ -527,7 +530,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6569,7 +6572,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d76a16895311e9..026c6a6caf5972 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5728,8 +5728,12 @@ void Compiler::generatePatchpointInfo() // regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask(); rsPushRegs |= RBM_FPBASE; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) rsPushRegs |= RBM_LR; +#elif defined(TARGET_LOONGARCH64) + rsPushRegs |= RBM_RA; +#elif defined(TARGET_RISCV64) + rsPushRegs |= RBM_RA; #endif patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.getLow()); JITDUMP("--OSR-- Tier0 callee saves: "); From b18b0fcd53bc7db088c16aae11b6b8ac3c6863a7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 16:19:30 +0200 Subject: [PATCH 21/32] Fix arm build --- src/coreclr/jit/codegenarm.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 2d2ab0026bd9b0..28e14d26d0375e 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1853,6 +1853,14 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #endif // PROFILING_SUPPORTED +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// No-op for arm without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ +} + //------------------------------------------------------------------------ // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. // From 20d11900303be58627d95e03f6bd301ed5c3b9d5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 16:21:43 +0200 Subject: [PATCH 22/32] Feedback --- src/coreclr/vm/jithelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index db4d323b5ce44a..a6e468ebb134be 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1721,7 +1721,7 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti SetSSP(pFrameContext, ssp - 8); } #endif // TARGET_WINDOWS -#endif // TARGET_ARM64 +#endif // TARGET_AMD64 SetSP(pFrameContext, currentSP); SetFP(pFrameContext, currentFP); From 1e4df823fb34f69d8a37607b19b328513538d13b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Apr 2026 17:14:43 +0200 Subject: [PATCH 23/32] Fix build --- src/coreclr/jit/codegenarm.cpp | 1 + src/coreclr/jit/codegenwasm.cpp | 9 +++++++++ src/coreclr/jit/codegenxarch.cpp | 11 +++++++++++ 3 files changed, 21 insertions(+) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 28e14d26d0375e..0d78045a07cbdc 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1859,6 +1859,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { + unreached(); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 04c6275ff32545..823db3673d1509 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -142,6 +142,15 @@ void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZer unreached(); // OSR not supported on WASM. } +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// No-op for asm without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + unreached(); +} + //------------------------------------------------------------------------ // genHomeRegisterParams: place register arguments into their RA-assigned locations. // diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index b696890a21a7f8..c4d56439363d33 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10006,6 +10006,17 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() osrAdditionalIntCalleeSaves &= ~regBit; } } +#else + +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// No-op for x86 without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + unreached(); +} + #endif // TARGET_AMD64 //------------------------------------------------------------------------ From 06e9e7fe6029a3b4f81500450a6692afea6ca915 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Apr 2026 17:25:56 +0200 Subject: [PATCH 24/32] Remove unnecessary phantom unwind --- src/coreclr/jit/codegencommon.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bf24f29e226fcc..c023527dc98a9c 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5049,23 +5049,6 @@ void CodeGen::genFnProlog() genBeginFnProlog(); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For some targets, emit a "phantom prolog" to account for the actions taken - // in the tier0 frame that impact FP and SP on entry to the OSR method. - // - // x64/arm64 handle this differently; the phantom prolog unwind is emitted in - // genOSRHandleTier0CalleeSavedRegistersAndFrame. - // - if (m_compiler->opts.IsOSR()) - { - PatchpointInfo* patchpointInfo = m_compiler->info.compPatchpointInfo; - const int tier0FrameSize = patchpointInfo->TotalFrameSize(); - - // SP is tier0 method's SP. - m_compiler->unwindAllocStack(tier0FrameSize); - } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - #ifdef DEBUG if (m_compiler->compJitHaltMethod()) From 05ffa2d2c4e62e415bb4a7d111c122abc17360c9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Apr 2026 17:26:42 +0200 Subject: [PATCH 25/32] Partial compilation patchpoints --- src/coreclr/jit/lclvars.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index facf2c66d7d5b1..4e70137a225d67 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5485,9 +5485,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() else { // Default configuration - codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || - opts.compDbgEnC || doesMethodHavePatchpoints() || - compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); + codeGen->SetSaveFpLrWithAllCalleeSavedRegisters( + (getNeedsGSSecurityCookie() && compLocallocUsed) || opts.compDbgEnC || doesMethodHavePatchpoints() || + doesMethodHavePartialCompilationPatchpoints() || compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); } } else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1) From 82ad389ebb31234ae261ab31bb0704b475e03817 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 10:06:57 +0200 Subject: [PATCH 26/32] Support frames without fp/lr saved with callee saves --- src/coreclr/jit/codegenarm64.cpp | 56 +++++++++++++++++++++++++------- src/coreclr/jit/compiler.cpp | 9 +++++ src/coreclr/jit/lclvars.cpp | 6 ++-- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3e315ddbf75801..071f46132eec79 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5581,7 +5581,7 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + regMaskTP const tier0CalleeSaves((regMaskSmall)patchpointInfo->CalleeSaveRegisters()); JITDUMP("--OSR--- tier0 has already saved "); JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); @@ -5596,29 +5596,61 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() regMaskTP restoreRegsFloat = tier0CalleeSaves & RBM_ALLFLOAT; regMaskTP restoreRegsInt = tier0CalleeSaves & ~restoreRegsFrame & ~restoreRegsFloat; - // FP is pointing to the FP/LR pair. That pair is always saved at the top. - int calleeSavesTop = 2 * REGSIZE_BYTES; - int frameRegsBottom = calleeSavesTop - 2 * REGSIZE_BYTES; - int intRegsTop = frameRegsBottom; - int floatRegsTop = intRegsTop - genCountBits(restoreRegsInt) * REGSIZE_BYTES; + regNumber baseReg; + int topOfCalleeSaves; + if (restoreRegsFrame != RBM_NONE) + { + // FP/LR was saved with the callee saves. It is always at the top. + // Restore rest of callee saves with the offset from FP. + baseReg = REG_FP; + topOfCalleeSaves = 0; + } + else + { + // FP/LR was not saved with the callee saves. Here we do not actually + // know the offset from FP to the callee saves, but we do know the + // offset from SP. + baseReg = REG_SP; + topOfCalleeSaves = patchpointInfo->TotalFrameSize(); + if (m_compiler->info.compIsVarArgs) + { + topOfCalleeSaves -= MAX_REG_ARG * REGSIZE_BYTES; + } + + if (topOfCalleeSaves > 504) + { + // Too far to encode ldp with sp directly. Compute top into another register. + baseReg = REG_IP0; + topOfCalleeSaves = 0; + // Note: not reporting unwind nops for this as we will pad below anyway. + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SP, topOfCalleeSaves, REG_IP0, + /* inUnwindRegion */ false); + } + } if (restoreRegsInt != RBM_NONE) { - genRestoreCalleeSavedRegisterGroup(restoreRegsInt, REG_FPBASE, 0, intRegsTop, /* reportUnwindData */ false); + genRestoreCalleeSavedRegisterGroup(restoreRegsInt, baseReg, 0, topOfCalleeSaves, /* reportUnwindData */ false); + topOfCalleeSaves -= genCountBits(restoreRegsInt) * REGSIZE_BYTES; } if (restoreRegsFloat != RBM_NONE) { - genRestoreCalleeSavedRegisterGroup(restoreRegsFloat, REG_FPBASE, 0, floatRegsTop, /* reportUnwindData */ false); + genRestoreCalleeSavedRegisterGroup(restoreRegsFloat, baseReg, 0, topOfCalleeSaves, + /* reportUnwindData */ false); + topOfCalleeSaves -= genCountBits(restoreRegsFloat) * REGSIZE_BYTES; } - assert(restoreRegsFrame == (RBM_FP | RBM_LR)); - genRestoreRegPair(REG_FP, REG_LR, REG_FPBASE, frameRegsBottom, 0, false, REG_IP1, nullptr, + // Regardless of frame type fp always points to the saved fp/lr for frame + // pointer chaining purposes, so restoring them is trivial. + genRestoreRegPair(REG_FP, REG_LR, REG_FP, 0, 0, false, REG_IP1, nullptr, /* reportUnwindData */ false); - m_compiler->unwindPadding(); - + // Emit phantom unwind data for the tier0 frame. m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); + // Emit nops to make the prolog 1:1 in unwind codes to instructions. This + // is needed for win-arm64. + m_compiler->unwindPadding(); } #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d3a9880aef5b41..724a9a476d9372 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5765,6 +5765,15 @@ void Compiler::generatePatchpointInfo() #elif defined(TARGET_RISCV64) rsPushRegs |= RBM_RA; #endif + +#ifdef TARGET_ARM64 + // For arm64 we communicate whether fp/lr are stored with the callee saves in this mask. + if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) + { + rsPushRegs &= ~(RBM_FP | RBM_LR); + } +#endif + patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.getLow()); JITDUMP("--OSR-- Tier0 callee saves: "); JITDUMPEXEC(dspRegMask(regMaskTP((regMaskSmall)patchpointInfo->CalleeSaveRegisters()))); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 0dbad206e17390..4c7a21f0e2bcc4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5485,9 +5485,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() else { // Default configuration - codeGen->SetSaveFpLrWithAllCalleeSavedRegisters( - (getNeedsGSSecurityCookie() && compLocallocUsed) || opts.compDbgEnC || doesMethodHavePatchpoints() || - doesMethodHavePartialCompilationPatchpoints() || compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); + codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || + opts.compDbgEnC || + compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); } } else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1) From 6d784e93793e096a375113998d2105b073a0dc33 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 10:19:20 +0200 Subject: [PATCH 27/32] Fixes --- src/coreclr/jit/codegenarm.cpp | 2 +- src/coreclr/jit/codegenarm64.cpp | 4 ++-- src/coreclr/jit/codegenwasm.cpp | 2 +- src/coreclr/jit/codegenxarch.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 0d78045a07cbdc..e28571af1629e2 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1855,7 +1855,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) //------------------------------------------------------------------------ // genOSRHandleTier0CalleeSavedRegistersAndFrame: -// No-op for arm without OSR support. +// Not called for arm without OSR support. // void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 071f46132eec79..1dc2e2ce8e3291 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5620,11 +5620,11 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() if (topOfCalleeSaves > 504) { // Too far to encode ldp with sp directly. Compute top into another register. - baseReg = REG_IP0; - topOfCalleeSaves = 0; // Note: not reporting unwind nops for this as we will pad below anyway. genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SP, topOfCalleeSaves, REG_IP0, /* inUnwindRegion */ false); + baseReg = REG_IP0; + topOfCalleeSaves = 0; } } diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 97bdab25165462..a8c072e8cacafe 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -144,7 +144,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZer //------------------------------------------------------------------------ // genOSRHandleTier0CalleeSavedRegistersAndFrame: -// No-op for asm without OSR support. +// Not called for WASM without OSR support. // void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 6bcfe2e7dc95b0..c02a7c003bb7bf 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10010,7 +10010,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() //------------------------------------------------------------------------ // genOSRHandleTier0CalleeSavedRegistersAndFrame: -// No-op for x86 without OSR support. +// Not called for x86 without OSR support. // void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { From bd302fb2332a4d214bcbe7c9b3dbc1c608f31c5a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 10:21:05 +0200 Subject: [PATCH 28/32] Remove SPMI hack --- src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 2306c3e7845a43..aeb678e6403242 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -4608,11 +4608,6 @@ PatchpointInfo* MethodContext::repGetOSRInfo(unsigned* ilOffset) *ilOffset = value.ilOffset; PatchpointInfo* ppi = (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index); - if (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_ARM64) - { - // Set FP | LR since old collections do not have it and that makes things assert. - ppi->SetCalleeSaveRegisters(ppi->CalleeSaveRegisters() | (1UL << 29) | (1UL << 30)); - } return ppi; } From 87e6ce5a7cd60d9dae11da736af5ea08299c3a40 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 10:45:07 +0200 Subject: [PATCH 29/32] Feedback --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c02a7c003bb7bf..f781a6438cbb84 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10028,7 +10028,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe #if DEBUG // OSR root frames must handle this differently. See - // genOSRRecordTier0CalleeSavedRegisters() + // genOSRHandleTier0CalleeSavedRegistersAndFrame() // genOSRSaveRemainingCalleeSavedRegisters() // if (m_compiler->opts.IsOSR()) From e1f8e92f584eea9582db875d3f4ea74feff1d4e0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 22:16:40 +0200 Subject: [PATCH 30/32] A few fixes --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/codegencommon.cpp | 10 +++++----- .../tools/superpmi/superpmi-shared/methodcontext.cpp | 3 +-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 1dc2e2ce8e3291..4f7eb08e69c1cf 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5617,7 +5617,7 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() topOfCalleeSaves -= MAX_REG_ARG * REGSIZE_BYTES; } - if (topOfCalleeSaves > 504) + if ((topOfCalleeSaves > 504) && ((restoreRegsInt != RBM_NONE) || (restoreRegsFloat != RBM_NONE))) { // Too far to encode ldp with sp directly. Compute top into another register. // Note: not reporting unwind nops for this as we will pad below anyway. diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 10bc8a9497fccb..9d7b4cb19ede5d 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5413,7 +5413,8 @@ void CodeGen::genFnProlog() inst_RV(INS_push, REG_FPBASE, TYP_REF); m_compiler->unwindPush(REG_FPBASE); } -#ifdef TARGET_X86 // On AMD64, establish the frame pointer after the "sub rsp" +#ifdef TARGET_X86 + // On x86 establish frame pointer now. For x64 we establish it after the "sub rsp". genEstablishFramePointer(0, /*reportUnwindData*/ true); #endif // TARGET_X86 @@ -5433,10 +5434,9 @@ void CodeGen::genFnProlog() #ifdef TARGET_AMD64 // For OSR x64 we need canonical epilogs (sequence of pops). Hence we do // not push any register in the prolog, we rather store them in the area - // allocated by the tier0 method. - // For OSR on other platforms we have no such requirement, instead we - // restore tier0 saved callee saves from its area, but then push the additional - // callee saves in the OSR method prologs as normal. + // allocated by the tier0 method. For OSR on other platforms we have no + // such requirement, instead we restore tier0 saved callee saves from its + // area on entry and then run the prolog as normal. pushesCalleeSaves = !inheritsCalleeSaves; #endif diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index aeb678e6403242..1e9fe7a1de880f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -4607,8 +4607,7 @@ PatchpointInfo* MethodContext::repGetOSRInfo(unsigned* ilOffset) DEBUG_REP(dmpGetOSRInfo(key, value)); *ilOffset = value.ilOffset; - PatchpointInfo* ppi = (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index); - return ppi; + return (PatchpointInfo*)GetOSRInfo->GetBuffer(value.index); } void MethodContext::recGetClassModuleIdForStatics(CORINFO_CLASS_HANDLE cls, From a41738fd6cad66040b7408cbd9bfc5bbc5e1a594 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 20 Apr 2026 12:07:17 +0200 Subject: [PATCH 31/32] Store in MethodDescCodeData --- src/coreclr/vm/jitinterface.cpp | 35 +++++++++++---------------------- src/coreclr/vm/method.cpp | 23 ++++++++++++++++++++++ src/coreclr/vm/method.hpp | 8 ++++++++ 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 3fc85926a48e27..c1210091d75330 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11386,17 +11386,6 @@ void CEECodeGenInfo::reportMetadata( EE_TO_JIT_TRANSITION_LEAF(); } -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) -struct AltJitPatchpointInfo -{ - AltJitPatchpointInfo* Next; - MethodDesc* Method; - PatchpointInfo* Info; -}; - -static AltJitPatchpointInfo* s_altJitPatchpointInfoList; -#endif - void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) { CONTRACTL { @@ -11416,15 +11405,18 @@ void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) { uint32_t ppiSize = patchpointInfo->PatchpointInfoSize(); - PatchpointInfo *newPpi = new (new uint8_t[ppiSize]) PatchpointInfo; + + AllocMemTracker am; + void* mem = am.Track(m_pMethodBeingCompiled->GetLoaderAllocator()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(ppiSize))); + PatchpointInfo *newPpi = new (mem) PatchpointInfo; newPpi->Initialize(patchpointInfo->NumberOfLocals(), patchpointInfo->TotalFrameSize()); newPpi->Copy(patchpointInfo); - AltJitPatchpointInfo* newInfo = new AltJitPatchpointInfo; - newInfo->Next = s_altJitPatchpointInfoList; - newInfo->Method = m_pMethodBeingCompiled; - newInfo->Info = newPpi; - s_altJitPatchpointInfoList = newInfo; + HRESULT hr = m_pMethodBeingCompiled->SetMethodDescAltJitPatchpointInfo(newPpi); + if (SUCCEEDED(hr)) + { + am.SuppressRelease(); + } } #endif @@ -11455,13 +11447,10 @@ PatchpointInfo* CEEJitInfo::getOSRInfo(unsigned* ilOffset) #if defined(_DEBUG) && defined(ALLOW_SXS_JIT) if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) { - for (AltJitPatchpointInfo* altJitPpi = s_altJitPatchpointInfoList; altJitPpi != NULL; altJitPpi = altJitPpi->Next) + PatchpointInfo* ppi = m_pMethodBeingCompiled->GetMethodDescAltJitPatchpointInfo(); + if (ppi != NULL) { - if (altJitPpi->Method == m_pMethodBeingCompiled) - { - result = altJitPpi->Info; - break; - } + result = ppi; } } #endif diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 6e4772c539db5c..c079fba250253f 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -274,6 +274,29 @@ void MethodDesc::SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTi _ASSERTE(m_codeData != NULL); VolatileStoreWithoutBarrier(&m_codeData->OptimizationTier, tier); } + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) +HRESULT MethodDesc::SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo) +{ + WRAPPER_NO_CONTRACT; + + HRESULT hr; + IfFailRet(EnsureCodeDataExists(NULL)); + + _ASSERTE(m_codeData != NULL); + VolatileStoreWithoutBarrier(&m_codeData->AltJitPatchpointInfo, pInfo); + return S_OK; +} + +PatchpointInfo* MethodDesc::GetMethodDescAltJitPatchpointInfo() +{ + WRAPPER_NO_CONTRACT; + if (m_codeData == NULL) + return nullptr; + return VolatileLoadWithoutBarrier(&m_codeData->AltJitPatchpointInfo); +} +#endif // _DEBUG && ALLOW_SXS_JIT + #endif // FEATURE_CODE_VERSIONING #ifdef FEATURE_INTERPRETER diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 9188d081cd5ee4..aee47a6a8a876d 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -262,6 +262,9 @@ struct MethodDescCodeData final #ifdef FEATURE_INTERPRETER CallStubHeader *CallStub; #endif // FEATURE_INTERPRETER +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + PatchpointInfo *AltJitPatchpointInfo; +#endif // _DEBUG && ALLOW_SXS_JIT }; using PTR_MethodDescCodeData = DPTR(MethodDescCodeData); @@ -1989,6 +1992,11 @@ class MethodDesc #ifndef DACCESS_COMPILE HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); void SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTier tier); + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + HRESULT SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo); + PatchpointInfo* GetMethodDescAltJitPatchpointInfo(); +#endif #endif // !DACCESS_COMPILE PTR_MethodDescVersioningState GetMethodDescVersionState(); NativeCodeVersion::OptimizationTier GetMethodDescOptimizationTier(); From 0dd6951b51a34e5093236044d22cb5ec0bab20ab Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Apr 2026 13:16:29 +0200 Subject: [PATCH 32/32] Fix patchpoint info --- src/coreclr/inc/patchpointinfo.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h index 6e8d32938b27ab..735b476a2c3e10 100644 --- a/src/coreclr/inc/patchpointinfo.h +++ b/src/coreclr/inc/patchpointinfo.h @@ -47,6 +47,8 @@ struct PatchpointInfo m_keptAliveThisOffset = -1; m_securityCookieOffset = -1; m_monitorAcquiredOffset = -1; + m_asyncExecutionContextOffset = -1; + m_asyncSynchronizationContextOffset = -1; } // Copy @@ -58,6 +60,8 @@ struct PatchpointInfo m_keptAliveThisOffset = original->m_keptAliveThisOffset; m_securityCookieOffset = original->m_securityCookieOffset; m_monitorAcquiredOffset = original->m_monitorAcquiredOffset; + m_asyncExecutionContextOffset = original->m_asyncExecutionContextOffset; + m_asyncSynchronizationContextOffset = original->m_asyncSynchronizationContextOffset; for (uint32_t i = 0; i < original->m_numberOfLocals; i++) {