From 62c80b7fc702480c98de20358ed1a04389d6628e Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:55:01 -0700 Subject: [PATCH 1/6] Revert "Just skip ARM32 instead of trying to get it all working all at once" This reverts commit 47ee93dabb8aa7a741f09804a1495923a62ed7a4. --- src/coreclr/jit/emit.cpp | 15 +++++++++++++-- .../ReadyToRunCodegenNodeFactory.cs | 11 +++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 3eafb5c5bc9344..d120705b9c9cfd 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8438,11 +8438,22 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) // Async call may have been removed very late, after we have introduced suspension/resumption. // In those cases just encode null. BYTE* target = emitLoc->Valid() ? emitOffsetToPtr(emitLoc->CodeOffset(this)) : nullptr; - aDstRW[i].Resume = (target_size_t)(uintptr_t)emitAsyncResumeStubEntryPoint; + BYTE* resumeStub = (BYTE*)emitAsyncResumeStubEntryPoint; + +#ifdef TARGET_ARM + // ARM32 requires the Thumb bit (bit 0) set on code pointers. + resumeStub = (BYTE*)((size_t)resumeStub | 1); + if (target != nullptr) + { + target = (BYTE*)((size_t)target | 1); + } +#endif + + aDstRW[i].Resume = (target_size_t)(uintptr_t)resumeStub; aDstRW[i].DiagnosticIP = (target_size_t)(uintptr_t)target; if (m_compiler->opts.compReloc) { - emitRecordRelocation(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, CorInfoReloc::DIRECT); + emitRecordRelocation(&aDstRW[i].Resume, resumeStub, CorInfoReloc::DIRECT); if (target != nullptr) { emitRecordRelocation(&aDstRW[i].DiagnosticIP, target, CorInfoReloc::DIRECT); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs index b8fe7c2466f5f6..5d123ac773fc67 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs @@ -482,13 +482,12 @@ public IEnumerable EnumerateCompiledMethods(EcmaModule moduleT foreach (IMethodNode methodNode in MetadataManager.GetCompiledMethods(moduleToEnumerate, methodCategory)) { MethodDesc method = methodNode.Method; - // Async methods are not emitted in composite mode nor on ARM32 - // The mutable module tokens emission is not well tested for composite mode and we should find a real solution for that problem - // ARM32 relocs require the thumb bit set, and the JIT/crossgen doesn't set it properly for the usages in async methods. + // Async methods are not emitted in composite mode. + // The mutable module tokens emission is not well tested for composite mode + // and we should find a real solution for that problem. // https://github.com/dotnet/runtime/issues/125337 - // https://github.com/dotnet/runtime/issues/125338 - if ((CompilationModuleGroup.IsCompositeBuildMode || Target.Architecture == TargetArchitecture.ARM) - && (method.IsAsyncVariant() || method.IsCompilerGeneratedILBodyForAsync())) + if (CompilationModuleGroup.IsCompositeBuildMode && + (method.IsAsyncVariant() || method.IsCompilerGeneratedILBodyForAsync())) { continue; } From c558b8cbb36e67ec3c0a683c297f1a0e04778786 Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Wed, 11 Mar 2026 17:12:56 -0700 Subject: [PATCH 2/6] Jit format --- src/coreclr/jit/emit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index d120705b9c9cfd..2f172a292e45e0 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8437,8 +8437,8 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) // Async call may have been removed very late, after we have introduced suspension/resumption. // In those cases just encode null. - BYTE* target = emitLoc->Valid() ? emitOffsetToPtr(emitLoc->CodeOffset(this)) : nullptr; - BYTE* resumeStub = (BYTE*)emitAsyncResumeStubEntryPoint; + BYTE* target = emitLoc->Valid() ? emitOffsetToPtr(emitLoc->CodeOffset(this)) : nullptr; + BYTE* resumeStub = (BYTE*)emitAsyncResumeStubEntryPoint; #ifdef TARGET_ARM // ARM32 requires the Thumb bit (bit 0) set on code pointers. From 3695c423ad52fd58946bea8e985a1ec2cbbc16d9 Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Thu, 12 Mar 2026 12:35:13 -0700 Subject: [PATCH 3/6] Fix ARM32 SIGILL in R2R async resume stubs by preserving Thumb bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ARM32, the JIT sets bit 0 on code pointers to indicate Thumb mode. When crossgen2's recordRelocation receives an external handle with the Thumb bit set, HandleToObject's integer division (by handleMultiplier=8) truncates the low bits, and the relocation delta is recorded as 0 instead of 1. At runtime, the resume stub pointer in the continuation layout lacks the Thumb bit, causing blx to switch the CPU to ARM mode where Thumb-encoded instructions are undefined — resulting in SIGILL. Fix: before calling HandleToObject, capture the low bits of the handle that would be lost to integer division and include them in relocDelta. This is architecture-agnostic: on non-ARM targets the low bits are always 0, so the mask is a no-op. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 2178c5fd0827e4..ff4f2712d3899b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -4294,7 +4294,13 @@ private void recordRelocation(void* location, void* locationRW, void* target, Co #endif default: - // Reloc points to something outside of the generated blocks + // Reloc points to something outside of the generated blocks. + // HandleToObject resolves via integer division (by handleMultiplier) which + // truncates low bits. On ARM, the JIT may have set bit 0 on the handle to + // indicate a Thumb code target. Preserve those bits before they are lost. + Debug.Assert(int.IsPow2(handleMultiplier), "handleMultiplier must be a power of 2 for the bitmask to capture the correct low bits"); + relocDelta = (int)((nint)target & (handleMultiplier - 1)); + var targetObject = HandleToObject(target); #if READYTORUN From 1970178c181a938aa7450470dcc2b005a69f50a8 Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Fri, 13 Mar 2026 10:23:01 -0700 Subject: [PATCH 4/6] ARM32: pass Thumb bit via addlDelta instead of mutating handle Revert the CorInfoImpl.cs fix that preserved low handle bits in recordRelocation's default case. That approach ran in shared ILC/R2R code and could double-count the thumb bit for NativeAOT (where the object writer already adds it to symbol definitions). Instead, keep the handle clean and pass the Thumb bit through the existing addlDelta parameter on the JIT side. For the resume stub (external handle), use emitRecordRelocationWithAddlDelta(..., 1). For DiagnosticIP (intra-code pointer), continue ORing directly since findKnownBlock preserves the offset including bit 0. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emit.cpp | 8 +++++++- src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs | 8 +------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 2f172a292e45e0..822de8ba8f773e 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8442,7 +8442,9 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) #ifdef TARGET_ARM // ARM32 requires the Thumb bit (bit 0) set on code pointers. - resumeStub = (BYTE*)((size_t)resumeStub | 1); + // For target (intra-code pointer), OR directly — findKnownBlock preserves it. + // For resumeStub (external handle), use addlDelta so the handle stays clean + // for HandleToObject's integer division in crossgen2. if (target != nullptr) { target = (BYTE*)((size_t)target | 1); @@ -8453,7 +8455,11 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) aDstRW[i].DiagnosticIP = (target_size_t)(uintptr_t)target; if (m_compiler->opts.compReloc) { +#ifdef TARGET_ARM + emitRecordRelocationWithAddlDelta(&aDstRW[i].Resume, resumeStub, CorInfoReloc::DIRECT, 1); +#else emitRecordRelocation(&aDstRW[i].Resume, resumeStub, CorInfoReloc::DIRECT); +#endif if (target != nullptr) { emitRecordRelocation(&aDstRW[i].DiagnosticIP, target, CorInfoReloc::DIRECT); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index ff4f2712d3899b..2178c5fd0827e4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -4294,13 +4294,7 @@ private void recordRelocation(void* location, void* locationRW, void* target, Co #endif default: - // Reloc points to something outside of the generated blocks. - // HandleToObject resolves via integer division (by handleMultiplier) which - // truncates low bits. On ARM, the JIT may have set bit 0 on the handle to - // indicate a Thumb code target. Preserve those bits before they are lost. - Debug.Assert(int.IsPow2(handleMultiplier), "handleMultiplier must be a power of 2 for the bitmask to capture the correct low bits"); - relocDelta = (int)((nint)target & (handleMultiplier - 1)); - + // Reloc points to something outside of the generated blocks var targetObject = HandleToObject(target); #if READYTORUN From 09e1a1e6081b3a71f83ace28102367a1b76755c7 Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:46:05 -0700 Subject: [PATCH 5/6] Move .Resume assignment back to original position Keep the Resume field assignment next to DiagnosticIP as it was on main, using emitAsyncResumeStubEntryPoint directly without an intermediate local variable. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emit.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 822de8ba8f773e..3862eea784b891 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8437,8 +8437,9 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) // Async call may have been removed very late, after we have introduced suspension/resumption. // In those cases just encode null. - BYTE* target = emitLoc->Valid() ? emitOffsetToPtr(emitLoc->CodeOffset(this)) : nullptr; - BYTE* resumeStub = (BYTE*)emitAsyncResumeStubEntryPoint; + BYTE* target = emitLoc->Valid() ? emitOffsetToPtr(emitLoc->CodeOffset(this)) : nullptr; + aDstRW[i].Resume = (target_size_t)(uintptr_t)emitAsyncResumeStubEntryPoint; + aDstRW[i].DiagnosticIP = (target_size_t)(uintptr_t)target; #ifdef TARGET_ARM // ARM32 requires the Thumb bit (bit 0) set on code pointers. @@ -8451,14 +8452,12 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) } #endif - aDstRW[i].Resume = (target_size_t)(uintptr_t)resumeStub; - aDstRW[i].DiagnosticIP = (target_size_t)(uintptr_t)target; if (m_compiler->opts.compReloc) { #ifdef TARGET_ARM - emitRecordRelocationWithAddlDelta(&aDstRW[i].Resume, resumeStub, CorInfoReloc::DIRECT, 1); + emitRecordRelocationWithAddlDelta(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, CorInfoReloc::DIRECT, 1); #else - emitRecordRelocation(&aDstRW[i].Resume, resumeStub, CorInfoReloc::DIRECT); + emitRecordRelocation(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, CorInfoReloc::DIRECT); #endif if (target != nullptr) { From ae9ef9465c4e4efd727155993d4060e5e68ebb39 Mon Sep 17 00:00:00 2001 From: Jackson Schuster <36744439+jtschuster@users.noreply.github.com> Date: Mon, 16 Mar 2026 13:29:13 -0700 Subject: [PATCH 6/6] Jit format and update comment --- src/coreclr/jit/emit.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 3862eea784b891..b0953ec4bff5b3 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8444,8 +8444,7 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) #ifdef TARGET_ARM // ARM32 requires the Thumb bit (bit 0) set on code pointers. // For target (intra-code pointer), OR directly — findKnownBlock preserves it. - // For resumeStub (external handle), use addlDelta so the handle stays clean - // for HandleToObject's integer division in crossgen2. + // For resumeStub (external handle), use addlDelta because we can't do math with handles. if (target != nullptr) { target = (BYTE*)((size_t)target | 1); @@ -8455,7 +8454,8 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, AllocMemChunk* chunks) if (m_compiler->opts.compReloc) { #ifdef TARGET_ARM - emitRecordRelocationWithAddlDelta(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, CorInfoReloc::DIRECT, 1); + emitRecordRelocationWithAddlDelta(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, + CorInfoReloc::DIRECT, 1); #else emitRecordRelocation(&aDstRW[i].Resume, emitAsyncResumeStubEntryPoint, CorInfoReloc::DIRECT); #endif