From df156e8cd18ad9cd3f87ad018bb358aa3fce68dc Mon Sep 17 00:00:00 2001
From: Adeel <3840695+am11@users.noreply.github.com>
Date: Mon, 16 Mar 2026 00:26:11 +0200
Subject: [PATCH] Port coreclr interpreter to loongarch64
---
src/coreclr/clr.featuredefines.props | 2 +-
src/coreclr/clrfeatures.cmake | 19 +-
.../Runtime/unix/unixasmmacrosloongarch64.inc | 23 +-
.../pal/inc/unixasmmacrosloongarch64.inc | 8 +
src/coreclr/runtime/loongarch64/InlineTls.inc | 25 +
src/coreclr/vm/callstubgenerator.cpp | 52 +-
src/coreclr/vm/callstubgenerator.h | 6 +-
src/coreclr/vm/loongarch64/asmconstants.h | 20 +
src/coreclr/vm/loongarch64/asmhelpers.S | 1768 +++++++++++++++++
src/coreclr/vm/loongarch64/stubs.cpp | 23 +
src/coreclr/vm/prestub.cpp | 2 +-
src/coreclr/vm/riscv64/asmhelpers.S | 49 +
.../Directory.Build.props | 2 +-
.../JIT/interpreter/InterpreterTester.csproj | 2 +-
14 files changed, 1945 insertions(+), 56 deletions(-)
create mode 100644 src/coreclr/runtime/loongarch64/InlineTls.inc
diff --git a/src/coreclr/clr.featuredefines.props b/src/coreclr/clr.featuredefines.props
index 76677d3c6148c2..1bdc341c1ccaa7 100644
--- a/src/coreclr/clr.featuredefines.props
+++ b/src/coreclr/clr.featuredefines.props
@@ -47,7 +47,7 @@
true
-
+
true
diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake
index 828c9be93fdd51..643835ed1dc316 100644
--- a/src/coreclr/clrfeatures.cmake
+++ b/src/coreclr/clrfeatures.cmake
@@ -48,16 +48,17 @@ if(CLR_CMAKE_TARGET_BROWSER)
endif()
if(NOT DEFINED FEATURE_INTERPRETER)
- if(CLR_CMAKE_TARGET_ANDROID)
- set(FEATURE_INTERPRETER 0)
- else()
- if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_RISCV64)
- set(FEATURE_INTERPRETER $,1,0>)
- else(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_RISCV64)
- set(FEATURE_INTERPRETER 0)
- endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_RISCV64)
+ set(FEATURE_INTERPRETER 0)
+
+ if(NOT CLR_CMAKE_TARGET_ANDROID AND
+ (CLR_CMAKE_TARGET_ARCH_AMD64 OR
+ CLR_CMAKE_TARGET_ARCH_ARM64 OR
+ CLR_CMAKE_TARGET_ARCH_ARM OR
+ CLR_CMAKE_TARGET_ARCH_RISCV64 OR
+ CLR_CMAKE_TARGET_ARCH_LOONGARCH64))
+ set(FEATURE_INTERPRETER $,1,0>)
endif()
-endif(NOT DEFINED FEATURE_INTERPRETER)
+endif()
if(NOT DEFINED FEATURE_STANDALONE_GC)
set(FEATURE_STANDALONE_GC 1)
diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc
index cf3583aae5ba87..1277824476cb8c 100644
--- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc
+++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc
@@ -143,28 +143,7 @@ C_FUNC(\Name):
.endm
-// Loads the address of a thread-local variable into the target register,
-// which cannot be a0. Preserves all other registers.
-.macro INLINE_GET_TLS_VAR target, var
- .ifc \target, $a0
- .error "target cannot be a0"
- .endif
-
- addi.d $sp, $sp, -16
- st.d $a0, $sp, 0
- st.d $ra, $sp, 8
-
- // This instruction is recognized and potentially patched
- // by the linker (GD->IE/LE relaxation).
- la.tls.desc $a0, \var
-
- ori \target, $tp, 0
- add.d \target, \target, $a0
-
- ld.d $a0, $sp, 0
- ld.d $ra, $sp, 8
- addi.d $sp, $sp, 16
-.endm
+#include
// Inlined version of RhpGetThread. Target cannot be a0.
.macro INLINE_GETTHREAD target
diff --git a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc
index 92d701598f933e..f042792f51832a 100644
--- a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc
+++ b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc
@@ -523,3 +523,11 @@ $__RedirectionStubEndFuncName
0:
#endif
.endm
+
+#include
+
+// Inlined version of RhpGetThread. Target cannot be a0.
+.macro INLINE_GETTHREAD target
+ INLINE_GET_TLS_VAR \target, C_FUNC(t_CurrentThreadInfo)
+ ld.d \target, \target, OFFSETOF__ThreadLocalInfo__m_pThread
+.endm
diff --git a/src/coreclr/runtime/loongarch64/InlineTls.inc b/src/coreclr/runtime/loongarch64/InlineTls.inc
new file mode 100644
index 00000000000000..2bcbf37cbd387b
--- /dev/null
+++ b/src/coreclr/runtime/loongarch64/InlineTls.inc
@@ -0,0 +1,25 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// Loads the address of a thread-local variable into the target register,
+// which cannot be a0. Preserves all other registers.
+.macro INLINE_GET_TLS_VAR target, var
+ .ifc \target, $a0
+ .error "target cannot be a0"
+ .endif
+
+ addi.d $sp, $sp, -16
+ st.d $a0, $sp, 0
+ st.d $ra, $sp, 8
+
+ // This instruction is recognized and potentially patched
+ // by the linker (GD->IE/LE relaxation).
+ la.tls.desc $a0, \var
+
+ ori \target, $tp, 0
+ add.d \target, \target, $a0
+
+ ld.d $a0, $sp, 0
+ ld.d $ra, $sp, 8
+ addi.d $sp, $sp, 16
+.endm
diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp
index ce30ce8757cc8f..941ca75e0f0236 100644
--- a/src/coreclr/vm/callstubgenerator.cpp
+++ b/src/coreclr/vm/callstubgenerator.cpp
@@ -615,7 +615,7 @@ extern "C" void Store_Stack_4B();
#endif // TARGET_ARM
-#ifdef TARGET_RISCV64
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
extern "C" void Load_A0();
extern "C" void Load_A0_A1();
@@ -783,7 +783,7 @@ extern "C" void Store_FA6();
extern "C" void Store_FA6_FA7();
extern "C" void Store_FA7();
-#endif // TARGET_RISCV64
+#endif // TARGET_RISCV64 || TARGET_LOONGARCH64
PCODE CallStubGenerator::GetStackRoutine()
{
@@ -879,7 +879,7 @@ PCODE CallStubGenerator::GetGPRegRangeRoutine(int r1, int r2)
(PCODE)0, (PCODE)0, (PCODE)Store_R2, (PCODE)Store_R2_R3,
(PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_R3
};
-#elif defined(TARGET_RISCV64)
+#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
static const PCODE GPRegsLoadRoutines[] = {
(PCODE)Load_A0, (PCODE)Load_A0_A1, (PCODE)Load_A0_A1_A2, (PCODE)Load_A0_A1_A2_A3, (PCODE)Load_A0_A1_A2_A3_A4, (PCODE)Load_A0_A1_A2_A3_A4_A5, (PCODE)Load_A0_A1_A2_A3_A4_A5_A6, (PCODE)Load_A0_A1_A2_A3_A4_A5_A6_A7,
(PCODE)0, (PCODE)Load_A1, (PCODE)Load_A1_A2, (PCODE)Load_A1_A2_A3, (PCODE)Load_A1_A2_A3_A4, (PCODE)Load_A1_A2_A3_A4_A5, (PCODE)Load_A1_A2_A3_A4_A5_A6, (PCODE)Load_A1_A2_A3_A4_A5_A6_A7,
@@ -931,7 +931,7 @@ PCODE CallStubGenerator::GetGPRegRefRoutine(int r)
(PCODE)Store_Ref_X0, (PCODE)Store_Ref_X1, (PCODE)Store_Ref_X2, (PCODE)Store_Ref_X3,
(PCODE)Store_Ref_X4, (PCODE)Store_Ref_X5, (PCODE)Store_Ref_X6, (PCODE)Store_Ref_X7
};
-#elif defined(TARGET_RISCV64)
+#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
static const PCODE GPRegsRefLoadRoutines[] = {
(PCODE)Load_Ref_A0, (PCODE)Load_Ref_A1, (PCODE)Load_Ref_A2, (PCODE)Load_Ref_A3,
(PCODE)Load_Ref_A4, (PCODE)Load_Ref_A5, (PCODE)Load_Ref_A6, (PCODE)Load_Ref_A7
@@ -1017,7 +1017,7 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2)
(PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_D6, (PCODE)Store_D6_D7,
(PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)0, (PCODE)Store_D7
};
-#elif defined(TARGET_RISCV64)
+#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
static const PCODE FPRegsLoadRoutines[] = {
(PCODE)Load_FA0, (PCODE)Load_FA0_FA1, (PCODE)Load_FA0_FA1_FA2, (PCODE)Load_FA0_FA1_FA2_FA3, (PCODE)Load_FA0_FA1_FA2_FA3_FA4, (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5, (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6, (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7,
(PCODE)0, (PCODE)Load_FA1, (PCODE)Load_FA1_FA2, (PCODE)Load_FA1_FA2_FA3, (PCODE)Load_FA1_FA2_FA3_FA4, (PCODE)Load_FA1_FA2_FA3_FA4_FA5, (PCODE)Load_FA1_FA2_FA3_FA4_FA5_FA6, (PCODE)Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7,
@@ -1315,16 +1315,20 @@ extern "C" void InterpreterStubRet3Vector128();
extern "C" void InterpreterStubRet4Vector128();
#endif // TARGET_ARM64
-#if defined(TARGET_RISCV64)
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
+extern "C" void CallJittedMethodRetFloat(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
+extern "C" void CallJittedMethodRet2Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
extern "C" void CallJittedMethodRet2I8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
extern "C" void CallJittedMethodRet2Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
extern "C" void CallJittedMethodRetFloatInt(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
extern "C" void CallJittedMethodRetIntFloat(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize, PTR_PTR_Object pContinuation);
+extern "C" void InterpreterStubRetFloat();
+extern "C" void InterpreterStubRet2Float();
extern "C" void InterpreterStubRet2I8();
extern "C" void InterpreterStubRet2Double();
extern "C" void InterpreterStubRetFloatInt();
extern "C" void InterpreterStubRetIntFloat();
-#endif // TARGET_RISCV64
+#endif // TARGET_RISCV64 || TARGET_LOONGARCH64
#define INVOKE_FUNCTION_PTR(functionPtrName) LOG2((LF2_INTERPRETER, LL_INFO10000, #functionPtrName "\n")); return functionPtrName
@@ -1432,7 +1436,11 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt
INVOKE_FUNCTION_PTR(CallJittedMethodRetSwiftLowered);
#endif // TARGET_APPLE
#endif // TARGET_ARM64
-#if defined(TARGET_RISCV64)
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
+ case ReturnTypeFloat:
+ INVOKE_FUNCTION_PTR(CallJittedMethodRetFloat);
+ case ReturnType2Float:
+ INVOKE_FUNCTION_PTR(CallJittedMethodRet2Float);
case ReturnType2I8:
INVOKE_FUNCTION_PTR(CallJittedMethodRet2I8);
case ReturnType2Double:
@@ -1441,7 +1449,7 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt
INVOKE_FUNCTION_PTR(CallJittedMethodRetFloatInt);
case ReturnTypeIntFloat:
INVOKE_FUNCTION_PTR(CallJittedMethodRetIntFloat);
-#endif // TARGET_RISCV64
+#endif // TARGET_RISCV64 || TARGET_LOONGARCH64
default:
_ASSERTE(!"Unexpected return type for interpreter stub");
return NULL; // This should never happen, but just in case.
@@ -1547,7 +1555,11 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu
case ReturnType4Vector128:
RETURN_TYPE_HANDLER(InterpreterStubRet4Vector128);
#endif // TARGET_ARM64
-#if defined(TARGET_RISCV64)
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
+ case ReturnTypeFloat:
+ RETURN_TYPE_HANDLER(InterpreterStubRetFloat);
+ case ReturnType2Float:
+ RETURN_TYPE_HANDLER(InterpreterStubRet2Float);
case ReturnType2I8:
RETURN_TYPE_HANDLER(InterpreterStubRet2I8);
case ReturnType2Double:
@@ -1556,7 +1568,7 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu
RETURN_TYPE_HANDLER(InterpreterStubRetFloatInt);
case ReturnTypeIntFloat:
RETURN_TYPE_HANDLER(InterpreterStubRetIntFloat);
-#endif // TARGET_RISCV64
+#endif // TARGET_RISCV64 || TARGET_LOONGARCH64
default:
_ASSERTE(!"Unexpected return type for interpreter stub");
return 0; // This should never happen, but just in case.
@@ -2665,9 +2677,9 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIteratorType *
return ReturnTypeI8;
break;
case ELEMENT_TYPE_R4:
-#if defined(TARGET_ARM64) || defined(TARGET_32BIT)
+#if defined(TARGET_ARM64) || defined(TARGET_32BIT) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
return ReturnTypeFloat;
-#endif // TARGET_ARM64 || TARGET_32BIT
+#endif // TARGET_ARM64 || TARGET_32BIT || TARGET_RISCV64 || TARGET_LOONGARCH64
case ELEMENT_TYPE_R8:
return ReturnTypeDouble;
break;
@@ -2821,19 +2833,21 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIteratorType *
_ASSERTE(!"The return types should be <= 8 bytes in size");
break;
}
-#elif defined(TARGET_RISCV64)
+#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
{
FpStructInRegistersInfo info = pArgIt->GetReturnFpStructInRegistersInfo();
- // RISC-V pass floating-point struct fields in FA registers
+ // RISC-V and LoongArch64 pass floating-point struct fields in FA/F registers.
+ // Preserve 32-bit float width where applicable to avoid reading/writing
+ // packed 4-byte fields via 8-byte helpers.
if ((info.flags & FpStruct::OnlyOne) != 0)
{
- // Single field - could be float or int in single register
- return ReturnTypeDouble; // Use Double routine for both float and double (NaN-boxed)
+ return (info.Size1st() == sizeof(float)) ? ReturnTypeFloat : ReturnTypeDouble;
}
else if ((info.flags & FpStruct::BothFloat) != 0)
{
- // Two float/double fields
- return ReturnType2Double;
+ return (info.Size1st() == sizeof(float) && info.Size2nd() == sizeof(float))
+ ? ReturnType2Float
+ : ReturnType2Double;
}
else if ((info.flags & FpStruct::FloatInt) != 0)
{
diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h
index c2996de338fba3..193849d18c3a0c 100644
--- a/src/coreclr/vm/callstubgenerator.h
+++ b/src/coreclr/vm/callstubgenerator.h
@@ -124,12 +124,14 @@ class CallStubGenerator
ReturnTypeSwiftLowered,
#endif // TARGET_APPLE
#endif // TARGET_ARM64
-#if defined(TARGET_RISCV64)
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
+ ReturnTypeFloat,
+ ReturnType2Float,
ReturnType2I8,
ReturnType2Double,
ReturnTypeFloatInt,
ReturnTypeIntFloat,
-#endif // TARGET_RISCV64
+#endif // TARGET_RISCV64 || TARGET_LOONGARCH64
};
enum class RoutineType
diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h
index 77404df969160f..56628960c1aeae 100644
--- a/src/coreclr/vm/loongarch64/asmconstants.h
+++ b/src/coreclr/vm/loongarch64/asmconstants.h
@@ -52,6 +52,9 @@ ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame));
#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0
ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context));
+#define OFFSETOF__ThreadLocalInfo__m_pThread 0
+ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLocalInfo, m_pThread));
+
#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) +
offsetof(gc_alloc_context, alloc_ptr));
@@ -219,6 +222,23 @@ ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecode
#define StubPrecodeData__SecretParam 0x00
ASMCONSTANTS_C_ASSERT(StubPrecodeData__SecretParam == offsetof(StubPrecodeData, SecretParam))
+#ifdef FEATURE_INTERPRETER
+#define OFFSETOF__InterpMethod__pCallStub 0x20
+ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub))
+
+#define OFFSETOF__Thread__m_pInterpThreadContext 0xB20
+ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext))
+
+#define OFFSETOF__InterpThreadContext__pStackPointer 0x10
+ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpThreadContext__pStackPointer == offsetof(InterpThreadContext, pStackPointer))
+
+#define OFFSETOF__CallStubHeader__Routines 0x18
+ASMCONSTANTS_C_ASSERT(OFFSETOF__CallStubHeader__Routines == offsetof(CallStubHeader, Routines))
+
+#define SIZEOF__TransitionBlock 0xA0
+ASMCONSTANTS_C_ASSERT(SIZEOF__TransitionBlock == sizeof(TransitionBlock))
+#endif // FEATURE_INTERPRETER
+
#ifdef FEATURE_TIERED_COMPILATION
#define CallCountingStubData__RemainingCallCountCell 0x00
ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell))
diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S
index bd6b7beec16007..d51c6bd22f504c 100644
--- a/src/coreclr/vm/loongarch64/asmhelpers.S
+++ b/src/coreclr/vm/loongarch64/asmhelpers.S
@@ -1069,3 +1069,1771 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler
break 0
NESTED_END IL_Rethrow, _TEXT
+#ifdef FEATURE_INTERPRETER
+
+// Align interpreter stack by adjusting it by 8 bytes
+LEAF_ENTRY InjectInterpStackAlign
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $r0, $t4, 0
+LEAF_END InjectInterpStackAlign
+
+// Copy arguments from the interpreter stack to the processor stack
+// The CPU stack slots are aligned to pointer size.
+LEAF_ENTRY Load_Stack
+ ld.wu $t4, $t2, 0 // SP offset
+ ld.wu $t5, $t2, 4 // number of stack slots
+ addi.d $t2, $t2, 8
+ add.d $t4, $sp, $t4
+ addi.d $t5, $t5, -8
+ blt $t5, $zero, LOCAL_LABEL(CopyBy1)
+LOCAL_LABEL(CopyLoop):
+ ld.d $t6, $t3, 0
+ st.d $t6, $t4, 0
+ addi.d $t3, $t3, 8
+ addi.d $t4, $t4, 8
+ addi.d $t5, $t5, -8
+ bge $t5, $zero, LOCAL_LABEL(CopyLoop)
+LOCAL_LABEL(CopyBy1):
+ addi.d $t5, $t5, 8
+ addi.d $t5, $t5, -1
+ blt $t5, $zero, LOCAL_LABEL(Done)
+LOCAL_LABEL(CopyLoop1):
+ ld.bu $t6, $t3, 0
+ st.b $t6, $t4, 0
+ addi.d $t3, $t3, 1
+ addi.d $t4, $t4, 1
+ addi.d $t5, $t5, -1
+ bge $t5, $zero, LOCAL_LABEL(CopyLoop1)
+LOCAL_LABEL(Done):
+ // Align $t3 to the stack slot size
+ addi.d $t3, $t3, 7
+ srli.d $t3, $t3, 3
+ slli.d $t3, $t3, 3
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $r0, $t4, 0
+LEAF_END Load_Stack
+
+// Load/Store stack reference routines (placeholders for GC tracking)
+LEAF_ENTRY Load_Stack_Ref
+ ld.wu $t4, $t2, 0 // SP offset ($zero-extend)
+ ld.wu $t5, $t2, 4 // size of the value type ($zero-extend)
+ add.d $t4, $sp, $t4
+ st.d $t3, $t4, 0
+ add.d $t3, $t3, $t5
+ // Align $t3 to the stack slot size
+ addi.d $t3, $t3, 7
+ srli.d $t3, $t3, 3
+ slli.d $t3, $t3, 3
+ ld.d $t4, $t2, 8 // Next routine pointer (aligned)
+ addi.d $t2, $t2, 16
+ jirl $r0, $t4, 0
+LEAF_END Load_Stack_Ref
+
+LEAF_ENTRY Store_Stack_Ref
+ ld.wu $t5, $t2, 0 // SP offset ($zero-extend)
+ ld.wu $t4, $t2, 4 // size of the value type ($zero-extend)
+ add.d $t5, $sp, $t5
+ // Split large immediate into separate additions to avoid 12-bit limit
+ addi.d $t5, $t5, __PWTB_TransitionBlock
+ addi.d $t5, $t5, SIZEOF__TransitionBlock
+ ld.d $t5, $t5, 0 // $t5 = pointer to source data from native stack
+ // Copy the data from native stack to interpreter stack
+ // Copy 8 bytes at a time if possible
+ addi.d $t4, $t4, -8
+ blt $t4, $zero, LOCAL_LABEL(StoreRefCopyBy1)
+LOCAL_LABEL(StoreRefCopyLoop8):
+ ld.d $t6, $t5, 0
+ st.d $t6, $t3, 0
+ addi.d $t5, $t5, 8
+ addi.d $t3, $t3, 8
+ addi.d $t4, $t4, -8
+ bge $t4, $zero, LOCAL_LABEL(StoreRefCopyLoop8)
+LOCAL_LABEL(StoreRefCopyBy1):
+ // Copy remaining bytes (0-7)
+ addi.d $t4, $t4, 8
+ beq $t4, $zero, LOCAL_LABEL(StoreRefCopyDone)
+LOCAL_LABEL(StoreRefCopyLoop1):
+ ld.bu $t6, $t5, 0
+ st.b $t6, $t3, 0
+ addi.d $t5, $t5, 1
+ addi.d $t3, $t3, 1
+ addi.d $t4, $t4, -1
+ bne $t4, $zero, LOCAL_LABEL(StoreRefCopyLoop1)
+LOCAL_LABEL(StoreRefCopyDone):
+ // Align $t3 to the stack slot size
+ addi.d $t3, $t3, 7
+ srli.d $t3, $t3, 3
+ slli.d $t3, $t3, 3
+ ld.d $t4, $t2, 8 // Next routine pointer (aligned)
+ addi.d $t2, $t2, 16
+ jirl $r0, $t4, 0
+LEAF_END Store_Stack_Ref
+
+// Macro for copying value types by reference
+// Arguments:
+// argReg - source register containing pointer to value type
+// $t4 - size of the value type (in bytes)
+// $t3 - destination pointer (interpreter stack)
+.macro Copy_Ref argReg
+ // Copy 8 bytes at a time if possible
+ addi.d $t4, $t4, -8
+ blt $t4, $zero, LOCAL_LABEL(CopyBy1\argReg)
+LOCAL_LABEL(RefCopyLoop8\argReg):
+ ld.d $t6, \argReg, 0
+ st.d $t6, $t3, 0
+ addi.d \argReg, \argReg, 8
+ addi.d $t3, $t3, 8
+ addi.d $t4, $t4, -8
+ bge $t4, $zero, LOCAL_LABEL(RefCopyLoop8\argReg)
+LOCAL_LABEL(CopyBy1\argReg):
+ // Copy remaining bytes (0-7)
+ addi.d $t4, $t4, 8
+ beq $t4, $zero, LOCAL_LABEL(RefCopyDone\argReg)
+LOCAL_LABEL(RefCopyLoop1\argReg):
+ ld.bu $t6, \argReg, 0
+ st.b $t6, $t3, 0
+ addi.d \argReg, \argReg, 1
+ addi.d $t3, $t3, 1
+ addi.d $t4, $t4, -1
+ bne $t4, $zero, LOCAL_LABEL(RefCopyLoop1\argReg)
+LOCAL_LABEL(RefCopyDone\argReg):
+ // Align $t3 to the stack slot size
+ addi.d $t3, $t3, 7
+ srli.d $t3, $t3, 3
+ slli.d $t3, $t3, 3
+.endm
+
+// Routines for passing value type arguments by reference in general purpose registers A0..A7
+// from native code to the interpreter (Store direction)
+
+.macro Store_Ref argReg, argRegLower
+LEAF_ENTRY Store_Ref_\argReg
+ ld.d $t4, $t2, 0 // size of the value type
+ addi.d $t2, $t2, 8
+ Copy_Ref \argRegLower
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $r0, $t4, 0
+LEAF_END Store_Ref_\argReg
+.endm
+
+Store_Ref A0, $a0
+Store_Ref A1, $a1
+Store_Ref A2, $a2
+Store_Ref A3, $a3
+Store_Ref A4, $a4
+Store_Ref A5, $a5
+Store_Ref A6, $a6
+Store_Ref A7, $a7
+
+// Routines for loading value type arguments by reference from interpreter stack
+// to general purpose registers A0..A7 (Load direction - interpreter to native)
+
+.macro Load_Ref argReg, argRegLower
+LEAF_ENTRY Load_Ref_\argReg
+ ld.d $t4, $t2, 0 // size of the value type
+ addi.d $t2, $t2, 8
+ move \argRegLower, $t3 // Pass pointer to the value type in register
+ add.d $t3, $t3, $t4 // Advance interpreter stack pointer
+ // Align $t3 to the stack slot size
+ addi.d $t3, $t3, 7
+ srli.d $t3, $t3, 3
+ slli.d $t3, $t3, 3
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $r0, $t4, 0
+LEAF_END Load_Ref_\argReg
+.endm
+
+Load_Ref A0, $a0
+Load_Ref A1, $a1
+Load_Ref A2, $a2
+Load_Ref A3, $a3
+Load_Ref A4, $a4
+Load_Ref A5, $a5
+Load_Ref A6, $a6
+Load_Ref A7, $a7
+
+// Call jitted method routines
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a4, $fp, 16
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 16
+ st.d $a2, $a4, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetVoid, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a4, $fp, 16
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ori $a0, $a2, 0 // buffer return pointer goes in $a0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 16
+ st.d $a2, $a4, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetBuff, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetI1, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ // Sign extend 8-bit to 64-bit
+ slli.d $a0, $a0, 56
+ srai.d $a0, $a0, 56
+ st.d $a0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetI1, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetI2, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ // Sign extend 16-bit to 64-bit
+ slli.d $a0, $a0, 48
+ srai.d $a0, $a0, 48
+ st.d $a0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetI2, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetU1, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ // Zero extend 8-bit to 64-bit
+ slli.d $a0, $a0, 56
+ srli.d $a0, $a0, 56
+ st.d $a0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetU1, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetU2, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ // Zero extend 16-bit to 64-bit
+ slli.d $a0, $a0, 48
+ srli.d $a0, $a0, 48
+ st.d $a0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetU2, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ st.d $a0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetI8, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ fst.d $fa0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetDouble, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ fst.s $fa0, $a2, 0
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetFloat, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRet2I8, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ st.d $a0, $a2, 0
+ st.d $a1, $a2, 8
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRet2I8, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRet2Double, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ fst.d $fa0, $a2, 0
+ fst.d $fa1, $a2, 8
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRet2Double, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRet2Float, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ fst.s $fa0, $a2, 0
+ fst.s $fa1, $a2, 4
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRet2Float, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetFloatInt, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ fst.d $fa0, $a2, 0
+ st.d $a0, $a2, 8
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetFloatInt, _TEXT
+
+// $a0 - routines array
+// $a1 - interpreter stack args location
+// $a2 - interpreter stack return value location
+// $a3 - stack arguments size (properly aligned)
+// $a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetIntFloat, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32
+ st.d $a2, $fp, 16
+ st.d $a4, $fp, 24
+ sub.d $sp, $sp, $a3
+ ori $t2, $a0, 0
+ ori $t3, $a1, 0
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ ld.d $a4, $fp, 24
+ st.d $a2, $a4, 0
+ ld.d $a2, $fp, 16
+ st.d $a0, $a2, 0
+ fst.d $fa0, $a2, 8
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetIntFloat, _TEXT
+
+NESTED_ENTRY InterpreterStub, _TEXT, NoHandler
+
+ PROLOG_WITH_TRANSITION_BLOCK
+
+ // IR bytecode address
+ ori $t6, $METHODDESC_REGISTER, 0
+
+ INLINE_GETTHREAD $t5 // thrashes $a0
+ beq $t5, $zero, LOCAL_LABEL(NoManagedThreadOrCallStub)
+
+ li.d $t1, OFFSETOF__Thread__m_pInterpThreadContext
+ add.d $t1, $t5, $t1
+ ld.d $t4, $t1, 0
+ bne $t4, $zero, LOCAL_LABEL(HaveInterpThreadContext)
+
+LOCAL_LABEL(NoManagedThreadOrCallStub):
+ addi.d $a0, $sp, __PWTB_TransitionBlock
+ ori $a1, $t6, 0
+ bl C_FUNC(GetInterpThreadContextWithPossiblyMissingThreadOrCallStub)
+ ori $t4, $a0, 0
+
+LOCAL_LABEL(HaveInterpThreadContext):
+
+ RESTORE_ARGUMENT_REGISTERS $sp, __PWTB_ArgumentRegisters
+ RESTORE_FLOAT_ARGUMENT_REGISTERS $sp, __PWTB_FloatArgumentRegisters
+
+ ld.d $t3, $t6, 0 // InterpMethod*
+ ld.d $t3, $t3, OFFSETOF__InterpMethod__pCallStub
+ beq $t3, $zero, LOCAL_LABEL(NoManagedThreadOrCallStub)
+ li.d $t2, OFFSETOF__CallStubHeader__Routines
+ add.d $t2, $t3, $t2
+ ld.d $t3, $t4, OFFSETOF__InterpThreadContext__pStackPointer
+ // $t6 contains IR bytecode address
+ // Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ jirl $ra, $t4, 0
+ // Fill in the ContinuationContext register
+ ld.d $a2, $sp, (__PWTB_ArgumentRegisters+16)
+
+ EPILOG_WITH_TRANSITION_BLOCK_RETURN
+
+NESTED_END InterpreterStub, _TEXT
+
+NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetVoid, _TEXT
+
+NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ ld.d $a0, $a0, 0
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetI8, _TEXT
+
+NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ fld.d $fa0, $a0, 0
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetDouble, _TEXT
+
+NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ ori $a2, $a0, 0 // save caller's return buffer in $a2
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer (x19)
+ bl C_FUNC(ExecuteInterpretedMethod)
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetBuff, _TEXT
+
+NESTED_ENTRY InterpreterStubRet2I8, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ ld.d $a1, $a0, 8
+ ld.d $a0, $a0, 0
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRet2I8, _TEXT
+
+NESTED_ENTRY InterpreterStubRet2Double, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ fld.d $fa0, $a0, 0
+ fld.d $fa1, $a0, 8
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRet2Double, _TEXT
+
+NESTED_ENTRY InterpreterStubRetFloat, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ fld.s $fa0, $a0, 0
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetFloat, _TEXT
+
+NESTED_ENTRY InterpreterStubRet2Float, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ fld.s $fa0, $a0, 0
+ fld.s $fa1, $a0, 4
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRet2Float, _TEXT
+
+NESTED_ENTRY InterpreterStubRetFloatInt, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ fld.d $fa0, $a0, 0
+ ld.d $a0, $a0, 8
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetFloatInt, _TEXT
+
+NESTED_ENTRY InterpreterStubRetIntFloat, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -16
+ // The +16 is for the $fp, $ra above
+ addi.d $a0, $sp, __PWTB_TransitionBlock + 16
+ ori $a1, $t6, 0 // the IR bytecode pointer
+ ori $a2, $zero, 0
+ bl C_FUNC(ExecuteInterpretedMethod)
+ ld.d $a1, $a0, 0
+ fld.d $fa0, $a0, 8
+ ori $a0, $a1, 0
+ EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16
+ EPILOG_RETURN
+NESTED_END InterpreterStubRetIntFloat, _TEXT
+
+// Copy arguments from the processor stack to the interpreter stack
+// The CPU stack slots are aligned to pointer size.
+
+LEAF_ENTRY Store_Stack
+ ld.wu $t4, $t2, 0 // SP offset
+ ld.wu $t5, $t2, 4 // size (multiple of stack slot size)
+ add.d $t4, $sp, $t4
+ // Split large immediate into separate additions to avoid 12-bit limit
+ addi.d $t4, $t4, __PWTB_TransitionBlock
+ addi.d $t4, $t4, SIZEOF__TransitionBlock
+LOCAL_LABEL(StoreCopyLoop):
+ ld.d $t6, $t4, 0
+ st.d $t6, $t3, 0
+ addi.d $t4, $t4, 8
+ addi.d $t3, $t3, 8
+ addi.d $t5, $t5, -8
+ bne $t5, $zero, LOCAL_LABEL(StoreCopyLoop)
+ ld.d $t4, $t2, 8
+ addi.d $t2, $t2, 16
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_Stack
+
+// Load/Store register routines
+// $t2 = routine pointer, $t3 = interpreter stack pointer
+// Pattern: Load from interpreter stack to argument registers, then jump to next routine
+
+LEAF_ENTRY Load_A0
+ ld.d $a0, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0
+
+LEAF_ENTRY Load_A0_A1
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1
+
+LEAF_ENTRY Load_A0_A1_A2
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2
+ ld.d $a2, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2
+
+LEAF_ENTRY Load_A0_A1_A2_A3
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2_A3
+ ld.d $a2, $t3, 0
+ ld.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2_A3
+
+LEAF_ENTRY Load_A0_A1_A2_A3_A4
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2_A3_A4
+ ld.d $a2, $t3, 0
+ ld.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A4
+ ld.d $a4, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2_A3_A4
+
+LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2_A3_A4_A5
+ ld.d $a2, $t3, 0
+ ld.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A4_A5
+ ld.d $a4, $t3, 0
+ ld.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2_A3_A4_A5
+
+LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5_A6
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2_A3_A4_A5_A6
+ ld.d $a2, $t3, 0
+ ld.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A4_A5_A6
+ ld.d $a4, $t3, 0
+ ld.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A6
+ ld.d $a6, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2_A3_A4_A5_A6
+
+LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5_A6_A7
+ ld.d $a0, $t3, 0
+ ld.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A2_A3_A4_A5_A6_A7
+ ld.d $a2, $t3, 0
+ ld.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A4_A5_A6_A7
+ ld.d $a4, $t3, 0
+ ld.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A6_A7
+ ld.d $a6, $t3, 0
+ ld.d $a7, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A0_A1_A2_A3_A4_A5_A6_A7
+
+LEAF_ENTRY Load_A1
+ ld.d $a1, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1
+
+LEAF_ENTRY Load_A1_A2
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2
+
+LEAF_ENTRY Load_A1_A2_A3
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A3
+ ld.d $a3, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2_A3
+
+LEAF_ENTRY Load_A1_A2_A3_A4
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A3_A4
+ ld.d $a3, $t3, 0
+ ld.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2_A3_A4
+
+LEAF_ENTRY Load_A1_A2_A3_A4_A5
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A3_A4_A5
+ ld.d $a3, $t3, 0
+ ld.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A5
+ ld.d $a5, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2_A3_A4_A5
+
+LEAF_ENTRY Load_A1_A2_A3_A4_A5_A6
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A3_A4_A5_A6
+ ld.d $a3, $t3, 0
+ ld.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A5_A6
+ ld.d $a5, $t3, 0
+ ld.d $a6, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2_A3_A4_A5_A6
+
+LEAF_ENTRY Load_A1_A2_A3_A4_A5_A6_A7
+ ld.d $a1, $t3, 0
+ ld.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A3_A4_A5_A6_A7
+ ld.d $a3, $t3, 0
+ ld.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A5_A6_A7
+ ld.d $a5, $t3, 0
+ ld.d $a6, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_A7
+ ld.d $a7, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_A1_A2_A3_A4_A5_A6_A7
+
+// Store routines: Store argument registers to interpreter stack
+
+LEAF_ENTRY Store_A0
+ st.d $a0, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0
+
+LEAF_ENTRY Store_A0_A1
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1
+
+LEAF_ENTRY Store_A0_A1_A2
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2
+ st.d $a2, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2
+
+LEAF_ENTRY Store_A0_A1_A2_A3
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2_A3
+ st.d $a2, $t3, 0
+ st.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2_A3
+
+LEAF_ENTRY Store_A0_A1_A2_A3_A4
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2_A3_A4
+ st.d $a2, $t3, 0
+ st.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A4
+ st.d $a4, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2_A3_A4
+
+LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2_A3_A4_A5
+ st.d $a2, $t3, 0
+ st.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A4_A5
+ st.d $a4, $t3, 0
+ st.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2_A3_A4_A5
+
+LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5_A6
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2_A3_A4_A5_A6
+ st.d $a2, $t3, 0
+ st.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A4_A5_A6
+ st.d $a4, $t3, 0
+ st.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A6
+ st.d $a6, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2_A3_A4_A5_A6
+
+LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5_A6_A7
+ st.d $a0, $t3, 0
+ st.d $a1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A2_A3_A4_A5_A6_A7
+ st.d $a2, $t3, 0
+ st.d $a3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A4_A5_A6_A7
+ st.d $a4, $t3, 0
+ st.d $a5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A6_A7
+ st.d $a6, $t3, 0
+ st.d $a7, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A0_A1_A2_A3_A4_A5_A6_A7
+
+LEAF_ENTRY Store_A1
+ st.d $a1, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1
+
+LEAF_ENTRY Store_A1_A2
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2
+
+LEAF_ENTRY Store_A1_A2_A3
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A3
+ st.d $a3, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2_A3
+
+LEAF_ENTRY Store_A1_A2_A3_A4
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A3_A4
+ st.d $a3, $t3, 0
+ st.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2_A3_A4
+
+LEAF_ENTRY Store_A1_A2_A3_A4_A5
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A3_A4_A5
+ st.d $a3, $t3, 0
+ st.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A5
+ st.d $a5, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2_A3_A4_A5
+
+LEAF_ENTRY Store_A1_A2_A3_A4_A5_A6
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A3_A4_A5_A6
+ st.d $a3, $t3, 0
+ st.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A5_A6
+ st.d $a5, $t3, 0
+ st.d $a6, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2_A3_A4_A5_A6
+
+LEAF_ENTRY Store_A1_A2_A3_A4_A5_A6_A7
+ st.d $a1, $t3, 0
+ st.d $a2, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A3_A4_A5_A6_A7
+ st.d $a3, $t3, 0
+ st.d $a4, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A5_A6_A7
+ st.d $a5, $t3, 0
+ st.d $a6, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_A7
+ st.d $a7, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_A1_A2_A3_A4_A5_A6_A7
+
+// Float point load/store routines
+
+LEAF_ENTRY Load_FA0
+ fld.d $fa0, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0
+
+LEAF_ENTRY Load_FA0_FA1
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1
+
+LEAF_ENTRY Load_FA0_FA1_FA2
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2
+ fld.d $fa2, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2
+
+LEAF_ENTRY Load_FA0_FA1_FA2_FA3
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2_FA3
+ fld.d $fa2, $t3, 0
+ fld.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2_FA3
+
+LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2_FA3_FA4
+ fld.d $fa2, $t3, 0
+ fld.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA4
+ fld.d $fa4, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2_FA3_FA4
+
+LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5
+ fld.d $fa2, $t3, 0
+ fld.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA4_FA5
+ fld.d $fa4, $t3, 0
+ fld.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5
+
+LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5_FA6
+ fld.d $fa2, $t3, 0
+ fld.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA4_FA5_FA6
+ fld.d $fa4, $t3, 0
+ fld.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA6
+ fld.d $fa6, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+ fld.d $fa0, $t3, 0
+ fld.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5_FA6_FA7
+ fld.d $fa2, $t3, 0
+ fld.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA4_FA5_FA6_FA7
+ fld.d $fa4, $t3, 0
+ fld.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Load_FA6_FA7
+ fld.d $fa6, $t3, 0
+ fld.d $fa7, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+
+// Additional Load_FA* routines starting from FA1, FA3, FA5, FA7
+LEAF_ENTRY Load_FA1
+ fld.d $fa1, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1
+
+LEAF_ENTRY Load_FA1_FA2
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2
+
+LEAF_ENTRY Load_FA1_FA2_FA3
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ fld.d $fa3, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2_FA3
+
+LEAF_ENTRY Load_FA1_FA2_FA3_FA4
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ fld.d $fa3, $t3, 16
+ fld.d $fa4, $t3, 24
+ addi.d $t3, $t3, 32
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2_FA3_FA4
+
+LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ fld.d $fa3, $t3, 16
+ fld.d $fa4, $t3, 24
+ fld.d $fa5, $t3, 32
+ addi.d $t3, $t3, 40
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2_FA3_FA4_FA5
+
+LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5_FA6
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ fld.d $fa3, $t3, 16
+ fld.d $fa4, $t3, 24
+ fld.d $fa5, $t3, 32
+ fld.d $fa6, $t3, 40
+ addi.d $t3, $t3, 48
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+ fld.d $fa1, $t3, 0
+ fld.d $fa2, $t3, 8
+ fld.d $fa3, $t3, 16
+ fld.d $fa4, $t3, 24
+ fld.d $fa5, $t3, 32
+ fld.d $fa6, $t3, 40
+ fld.d $fa7, $t3, 48
+ addi.d $t3, $t3, 56
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+
+LEAF_ENTRY Load_FA3
+ fld.d $fa3, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA3
+
+LEAF_ENTRY Load_FA3_FA4
+ fld.d $fa3, $t3, 0
+ fld.d $fa4, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA3_FA4
+
+LEAF_ENTRY Load_FA3_FA4_FA5
+ fld.d $fa3, $t3, 0
+ fld.d $fa4, $t3, 8
+ fld.d $fa5, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA3_FA4_FA5
+
+LEAF_ENTRY Load_FA3_FA4_FA5_FA6
+ fld.d $fa3, $t3, 0
+ fld.d $fa4, $t3, 8
+ fld.d $fa5, $t3, 16
+ fld.d $fa6, $t3, 24
+ addi.d $t3, $t3, 32
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Load_FA3_FA4_FA5_FA6_FA7
+ fld.d $fa3, $t3, 0
+ fld.d $fa4, $t3, 8
+ fld.d $fa5, $t3, 16
+ fld.d $fa6, $t3, 24
+ fld.d $fa7, $t3, 32
+ addi.d $t3, $t3, 40
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA3_FA4_FA5_FA6_FA7
+
+LEAF_ENTRY Load_FA5
+ fld.d $fa5, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA5
+
+LEAF_ENTRY Load_FA5_FA6
+ fld.d $fa5, $t3, 0
+ fld.d $fa6, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA5_FA6
+
+LEAF_ENTRY Load_FA5_FA6_FA7
+ fld.d $fa5, $t3, 0
+ fld.d $fa6, $t3, 8
+ fld.d $fa7, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA5_FA6_FA7
+
+LEAF_ENTRY Load_FA7
+ fld.d $fa7, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Load_FA7
+
+LEAF_ENTRY Store_FA0
+ fst.d $fa0, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0
+
+LEAF_ENTRY Store_FA0_FA1
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1
+
+LEAF_ENTRY Store_FA0_FA1_FA2
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2
+ fst.d $fa2, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2
+
+LEAF_ENTRY Store_FA0_FA1_FA2_FA3
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2_FA3
+ fst.d $fa2, $t3, 0
+ fst.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2_FA3
+
+LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2_FA3_FA4
+ fst.d $fa2, $t3, 0
+ fst.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA4
+ fst.d $fa4, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2_FA3_FA4
+
+LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5
+ fst.d $fa2, $t3, 0
+ fst.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA4_FA5
+ fst.d $fa4, $t3, 0
+ fst.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5
+
+LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5_FA6
+ fst.d $fa2, $t3, 0
+ fst.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA4_FA5_FA6
+ fst.d $fa4, $t3, 0
+ fst.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA6
+ fst.d $fa6, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+ fst.d $fa0, $t3, 0
+ fst.d $fa1, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5_FA6_FA7
+ fst.d $fa2, $t3, 0
+ fst.d $fa3, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA4_FA5_FA6_FA7
+ fst.d $fa4, $t3, 0
+ fst.d $fa5, $t3, 8
+ addi.d $t3, $t3, 16
+ALTERNATE_ENTRY Store_FA6_FA7
+ fst.d $fa6, $t3, 0
+ fst.d $fa7, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+
+// Additional Store_FA* routines starting from FA1, FA3, FA5, FA7
+LEAF_ENTRY Store_FA1
+ fst.d $fa1, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1
+
+LEAF_ENTRY Store_FA1_FA2
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2
+
+LEAF_ENTRY Store_FA1_FA2_FA3
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ fst.d $fa3, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2_FA3
+
+LEAF_ENTRY Store_FA1_FA2_FA3_FA4
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ fst.d $fa3, $t3, 16
+ fst.d $fa4, $t3, 24
+ addi.d $t3, $t3, 32
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2_FA3_FA4
+
+LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ fst.d $fa3, $t3, 16
+ fst.d $fa4, $t3, 24
+ fst.d $fa5, $t3, 32
+ addi.d $t3, $t3, 40
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2_FA3_FA4_FA5
+
+LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5_FA6
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ fst.d $fa3, $t3, 16
+ fst.d $fa4, $t3, 24
+ fst.d $fa5, $t3, 32
+ fst.d $fa6, $t3, 40
+ addi.d $t3, $t3, 48
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+ fst.d $fa1, $t3, 0
+ fst.d $fa2, $t3, 8
+ fst.d $fa3, $t3, 16
+ fst.d $fa4, $t3, 24
+ fst.d $fa5, $t3, 32
+ fst.d $fa6, $t3, 40
+ fst.d $fa7, $t3, 48
+ addi.d $t3, $t3, 56
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7
+
+LEAF_ENTRY Store_FA3
+ fst.d $fa3, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA3
+
+LEAF_ENTRY Store_FA3_FA4
+ fst.d $fa3, $t3, 0
+ fst.d $fa4, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA3_FA4
+
+LEAF_ENTRY Store_FA3_FA4_FA5
+ fst.d $fa3, $t3, 0
+ fst.d $fa4, $t3, 8
+ fst.d $fa5, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA3_FA4_FA5
+
+LEAF_ENTRY Store_FA3_FA4_FA5_FA6
+ fst.d $fa3, $t3, 0
+ fst.d $fa4, $t3, 8
+ fst.d $fa5, $t3, 16
+ fst.d $fa6, $t3, 24
+ addi.d $t3, $t3, 32
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA3_FA4_FA5_FA6
+
+LEAF_ENTRY Store_FA3_FA4_FA5_FA6_FA7
+ fst.d $fa3, $t3, 0
+ fst.d $fa4, $t3, 8
+ fst.d $fa5, $t3, 16
+ fst.d $fa6, $t3, 24
+ fst.d $fa7, $t3, 32
+ addi.d $t3, $t3, 40
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA3_FA4_FA5_FA6_FA7
+
+LEAF_ENTRY Store_FA5
+ fst.d $fa5, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA5
+
+LEAF_ENTRY Store_FA5_FA6
+ fst.d $fa5, $t3, 0
+ fst.d $fa6, $t3, 8
+ addi.d $t3, $t3, 16
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA5_FA6
+
+LEAF_ENTRY Store_FA5_FA6_FA7
+ fst.d $fa5, $t3, 0
+ fst.d $fa6, $t3, 8
+ fst.d $fa7, $t3, 16
+ addi.d $t3, $t3, 24
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA5_FA6_FA7
+
+LEAF_ENTRY Store_FA7
+ fst.d $fa7, $t3, 0
+ addi.d $t3, $t3, 8
+ ld.d $t4, $t2, 0
+ addi.d $t2, $t2, 8
+ EPILOG_BRANCH_REG $t4
+LEAF_END Store_FA7
+
+// ------------------------------------------------------------------
+// Create a real TransitionBlock and bl CallInterpreterFuncletWorker
+// to execute an interpreter funclet (catch/finally/filter handler).
+//
+// extern "C" DWORD_PTR CallInterpreterFunclet(
+// OBJECTREF throwable, // $a0
+// void* pHandler, // $a1
+// REGDISPLAY *pRD, // $a2
+// ExInfo *pExInfo, // $a3
+// bool isFilter // $a4
+// );
+// ------------------------------------------------------------------
+NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler
+
+ PROLOG_WITH_TRANSITION_BLOCK
+
+ // Pass TransitionBlock* as last (6th) argument
+ // Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*)
+ // Original args: $a0=throwable, $a1=pHandler, $a2=pRD, $a3=pExInfo, $a4=isFilter
+ // $a0-$a4 remain unchanged
+
+ addi.d $a5, $sp, __PWTB_TransitionBlock // TransitionBlock* as 6th param ($a5)
+
+ bl C_FUNC(CallInterpreterFuncletWorker)
+
+ EPILOG_WITH_TRANSITION_BLOCK_RETURN
+
+NESTED_END CallInterpreterFunclet, _TEXT
+
+// ------------------------------------------------------------------
+// Resume an interpreter continuation after an async await.
+// The worker function will restore callee-saved registers from the
+// TransitionBlock.
+//
+// FCDECL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage);
+//
+// ------------------------------------------------------------------
+NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT, NoHandler
+
+ PROLOG_WITH_TRANSITION_BLOCK
+
+ // Worker signature: AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, TransitionBlock*)
+ // $a0, $a1 remain unchanged
+
+ addi.d $a2, $sp, __PWTB_TransitionBlock // TransitionBlock* as 3rd param ($a2)
+
+ bl C_FUNC(AsyncHelpers_ResumeInterpreterContinuationWorker)
+
+ EPILOG_WITH_TRANSITION_BLOCK_RETURN
+
+NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT
+
+#endif // FEATURE_INTERPRETER
diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp
index 8f69312046cb30..79d1b09a39e322 100644
--- a/src/coreclr/vm/loongarch64/stubs.cpp
+++ b/src/coreclr/vm/loongarch64/stubs.cpp
@@ -293,6 +293,29 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl
LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay_Impl(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
}
+#ifdef FEATURE_INTERPRETER
+#ifndef DACCESS_COMPILE
+void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // The interpreter frame saves the floating point callee-saved registers (f24-f31)
+ // before FloatArgumentRegisters and TransitionBlock:
+ // [f24-f31 (64 bytes)] [fa0-fa7 (64 bytes)] [TransitionBlock]
+ // So f24-f31 are located at TransitionBlock - 128.
+ TADDR pTransitionBlock = GetTransitionBlock();
+ UINT64 *pCalleeSavedFloats = (UINT64*)((BYTE*)pTransitionBlock - 128);
+
+ // LoongArch CONTEXT::F has 4 slots per register for LASX support.
+ // Each scalar double value is stored in the first slot.
+ for (int i = 0; i < 8; i++)
+ {
+ memcpy(&pRD->pCurrentContext->F[(24 + i) * 4], &pCalleeSavedFloats[i], sizeof(double));
+ }
+}
+#endif // DACCESS_COMPILE
+#endif // FEATURE_INTERPRETER
+
void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats)
{
LIMITED_METHOD_DAC_CONTRACT;
diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp
index 3bc20bab6854d8..a9758414d1c684 100644
--- a/src/coreclr/vm/prestub.cpp
+++ b/src/coreclr/vm/prestub.cpp
@@ -2037,7 +2037,7 @@ extern "C" void* STDCALL ExecuteInterpretedMethod(TransitionBlock* pTransitionBl
pArgumentRegisters->x[2] = (INT64)*frames.interpreterFrame.GetContinuationPtr();
#elif defined(TARGET_ARM)
pArgumentRegisters->r[2] = (INT64)*frames.interpreterFrame.GetContinuationPtr();
- #elif defined(TARGET_RISCV64)
+ #elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
pArgumentRegisters->a[2] = (INT64)*frames.interpreterFrame.GetContinuationPtr();
#elif defined(TARGET_WASM)
// We do not yet have an ABI for WebAssembly native code to handle here.
diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S
index 0d14aab155fc4f..b20af9c1085686 100644
--- a/src/coreclr/vm/riscv64/asmhelpers.S
+++ b/src/coreclr/vm/riscv64/asmhelpers.S
@@ -1301,6 +1301,30 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler
EPILOG_RETURN
NESTED_END CallJittedMethodRetDouble, _TEXT
+// a0 - routines array
+// a1 - interpreter stack args location
+// a2 - interpreter stack return value location
+// a3 - stack arguments size (properly aligned)
+// a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32
+ sd a2, 16(fp)
+ sd a4, 24(fp)
+ sub sp, sp, a3
+ mv t2, a0
+ mv t3, a1
+ ld t4, 0(t2)
+ addi t2, t2, 8
+ jalr t4
+ ld a4, 24(fp)
+ sd a2, 0(a4)
+ ld a2, 16(fp)
+ fsw fa0, 0(a2)
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRetFloat, _TEXT
+
// a0 - routines array
// a1 - interpreter stack args location
// a2 - interpreter stack return value location
@@ -1351,6 +1375,31 @@ NESTED_ENTRY CallJittedMethodRet2Double, _TEXT, NoHandler
EPILOG_RETURN
NESTED_END CallJittedMethodRet2Double, _TEXT
+// a0 - routines array
+// a1 - interpreter stack args location
+// a2 - interpreter stack return value location
+// a3 - stack arguments size (properly aligned)
+// a4 - address of continuation return value
+NESTED_ENTRY CallJittedMethodRet2Float, _TEXT, NoHandler
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32
+ sd a2, 16(fp)
+ sd a4, 24(fp)
+ sub sp, sp, a3
+ mv t2, a0
+ mv t3, a1
+ ld t4, 0(t2)
+ addi t2, t2, 8
+ jalr t4
+ ld a4, 24(fp)
+ sd a2, 0(a4)
+ ld a2, 16(fp)
+ fsw fa0, 0(a2)
+ fsw fa1, 4(a2)
+ EPILOG_STACK_RESTORE
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32
+ EPILOG_RETURN
+NESTED_END CallJittedMethodRet2Float, _TEXT
+
// a0 - routines array
// a1 - interpreter stack args location
// a2 - interpreter stack return value location
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
index 7c752b511514ae..52fcfcf860bbf9 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
@@ -122,7 +122,7 @@
-
+
diff --git a/src/tests/JIT/interpreter/InterpreterTester.csproj b/src/tests/JIT/interpreter/InterpreterTester.csproj
index 45a180eef5fe85..5e0641be74dc2b 100644
--- a/src/tests/JIT/interpreter/InterpreterTester.csproj
+++ b/src/tests/JIT/interpreter/InterpreterTester.csproj
@@ -2,7 +2,7 @@
true
true
- true
+ true
true