From e98a935cbdd61bd87c13ba4ffc3268d75ae7d622 Mon Sep 17 00:00:00 2001 From: xuliangyu Date: Tue, 7 Apr 2026 10:45:24 +0800 Subject: [PATCH 1/2] [LoongArch64] Fix the floating-point register copy in SoftwareExceptionFrame::UpdateContextFromTransitionBlock(). --- src/coreclr/pal/inc/unixasmmacrosloongarch64.inc | 5 +++-- src/coreclr/vm/excep.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc index f042792f51832a..8f55324fe06363 100644 --- a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc @@ -411,12 +411,13 @@ C_FUNC(\Name\()_End): // Stack layout (from low to high address): // sp+0: FloatArgumentRegisters (fa0-fa7, 64 bytes) // sp+64: TransitionBlock start -// - CalleeSavedRegisters (fp, ra, s0-s8 - 96 bytes) +// - CalleeSavedRegisters (fp, ra, s0-s8 - 88 bytes) +// - padding (8 bytes) // - ArgumentRegisters (a0-a7, 64 bytes) // // On exit, \target contains the TransitionBlock pointer (sp+128). .macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target - // Stack: FPCalleeSaved(64) + FloatArgs(64) + CalleeSaved(96) + Args(64) = 288 bytes + // Stack: FPCalleeSaved(64) + FloatArgs(64) + CalleeSaved(88) + pad(8) + Args(64) = 288 bytes PROLOG_STACK_ALLOC 288 PROLOG_SAVE_REG_PAIR 22, 1, 128, 1 diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 4aa703ba854ff9..4daf9f8dbdb03b 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -10660,12 +10660,11 @@ void SoftwareExceptionFrame::UpdateContextFromTransitionBlock(TransitionBlock *p // Copy floating point argument registers (fa0-fa7) // F[] array in CONTEXT is 4*32 elements for LSX/LASX support. - // Each FP register takes 4 slots (for 256-bit LASX vectors). // For 64-bit doubles, we only use the first slot of each register. FloatArgumentRegisters *pFloatArgs = (FloatArgumentRegisters*)((BYTE*)pTransitionBlock + TransitionBlock::GetOffsetOfFloatArgumentRegisters()); for (int i = 0; i < 8; i++) { - memcpy(&m_Context.F[i * 4], &pFloatArgs->f[i], sizeof(double)); + memcpy(&m_Context.F[i], &pFloatArgs->f[i], sizeof(double)); } // Read FP callee-saved registers (f24-f31) from the stack @@ -10675,17 +10674,18 @@ void SoftwareExceptionFrame::UpdateContextFromTransitionBlock(TransitionBlock *p UINT64 *pFpCalleeSaved = (UINT64*)((BYTE*)pTransitionBlock - 128); for (int i = 0; i < 8; i++) { - // f24-f31 map to indices 24-31 in the F array, each taking 4 slots - memcpy(&m_Context.F[(24 + i) * 4], &pFpCalleeSaved[i], sizeof(double)); + // f24-f31 map to indices 24-31 in the F array + memcpy(&m_Context.F[24 + i], &pFpCalleeSaved[i], sizeof(double)); } // Initialize remaining F registers (f8-f23) to zero for (int i = 8; i < 24; i++) { - memset(&m_Context.F[i * 4], 0, sizeof(double) * 4); + memset(&m_Context.F[i], 0, sizeof(double)); } - // Initialize FP control/status register + // Initialize FP control/status and condition flag registers m_Context.Fcsr = 0; + m_Context.Fcc = 0; // Set up context pointers for callee-saved registers m_ContextPointers.S0 = &m_Context.S0; From 2cb9664030a3522eeffa9c9a033a954f3ac50ee6 Mon Sep 17 00:00:00 2001 From: xuliangyu Date: Wed, 8 Apr 2026 14:25:17 +0800 Subject: [PATCH 2/2] synchronize the floating-point register copy in InterpreterFrame::UpdateFloatingPointRegisters_Impl(). --- src/coreclr/vm/loongarch64/stubs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp index 2ae94109cf666a..034c84a94577b7 100644 --- a/src/coreclr/vm/loongarch64/stubs.cpp +++ b/src/coreclr/vm/loongarch64/stubs.cpp @@ -307,10 +307,10 @@ void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, UINT64 *pCalleeSavedFloats = (UINT64*)((BYTE*)pTransitionBlock - 128); // LoongArch CONTEXT::F has 4 slots per register for LASX support. - // Each scalar double value is stored in the first slot. + // Scalar double value is stored in the first 8 * 32. for (int i = 0; i < 8; i++) { - memcpy(&pRD->pCurrentContext->F[(24 + i) * 4], &pCalleeSavedFloats[i], sizeof(double)); + memcpy(&pRD->pCurrentContext->F[24 + i], &pCalleeSavedFloats[i], sizeof(double)); } } #endif // DACCESS_COMPILE