Use dispatch helper for calling interface methods by MichalStrehovsky · Pull Request #123252 · dotnet/runtime

MichalStrehovsky · 2026-01-16T08:50:02Z

Still very much WIP, don't look at implementation. This works just enough that I can do perf measurements.

We need a CFG flavor of this helper for x64 and arm64 Windows
Look at the hash function
Add optimized flavors of the helper for supported platforms (leave community platforms as issues)

This is changing interface dispatch from this shape:

mov         rcx,rbx  ; this
mov         r8d,edi  ; arg1
mov         edx,esi  ; arg2
lea         r11,[__InterfaceDispatchCell_repro_IFace__Call_repro_Program__DoCallFixed (07FF693C155B0h)]  
call        qword ptr [r11]

To this shape:

mov         rcx,rbx  ; this
mov         r8d,edi  ; arg1
mov         edx,esi  ; arg2
lea         r11,[__InterfaceDispatchCell_repro_IFace__Call_repro_Program__DoCallFixed (07FF682916700h)]  
call        RhpResolveInterfaceMethodFast (07FF682864E60h)

What this is also changing is the contents of the dispatch cell. The dispatch cell is now two pointers: a cached this pointer and a cached target method address. The information about the dispatch cell (interface type and slot) is stored out-of-line (the dispatch cell now goes to the .bss segment and the metadata is readonly data).

On a first dispatch, we call the slow resolution helper that will decompose the dispatch cell to MethodTable+slot, compute the result of lookup and store it in the dispatch cell itself. This is the fastest, monomorphic case.

If we later see dispatches with different kind of this, we cache them in a global hashtable. The key of the global hashtable is the this MethodTable address and the dispatch cell address. We use this as the key instead of interface MethodTable+slot+this MethodTable because it's faster to hash/compare and touches less memory.

Because the contents/shape of the dispatch cell is now fixed, we can inline the monomorphic case in the invoke sequence. This is not done yet.

Cc @dotnet/ilc-contrib

MichalStrehovsky · 2026-01-16T08:53:13Z

Test program:

Details

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;

class Program
{
    static void Main()
    {
        for (int it = 0; it < 6; it++)
        {
            IFace c0 = new C0();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCallFixed(c0, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            IFace[] faces = [
                new C0(), new C1(), new C2(), new C3(),
            new C4(), new C5(), new C6(), new C7(),
            new C8(), new C9(), new CA(), new CB(),
            new CC(), new CD(), new CE(), new CF(),
            ];

            sw = Stopwatch.StartNew();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCall2(faces, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            sw = Stopwatch.StartNew();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCall4(faces, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            sw = Stopwatch.StartNew();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCall8(faces, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            sw = Stopwatch.StartNew();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCall16(faces, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            for (int i = 0; i < faces.Length; i++)
            {
                DoCallFixed(faces[i], 1, 2);
            }

            sw = Stopwatch.StartNew();
            IFace cf = new CF();
            for (int i = 0; i < 10_000_000; i++)
            {
                DoCallFixed(cf, 1, 2);
            }
            Console.WriteLine(sw.ElapsedMilliseconds);

            Console.WriteLine("---------------------------");
        }
    }



    [MethodImpl(MethodImplOptions.NoInlining)]
    //[MethodImpl(MethodImplOptions.AggressiveInlining)]
    static void DoCallFixed(IFace i, int x, int y)
    {
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
        i.Call(x, y);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    //[MethodImpl(MethodImplOptions.AggressiveInlining)]
    static void DoCall2(IFace[] i, int x, int y)
    {
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[0].Call(x, y);
        i[1].Call(x, y);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    //[MethodImpl(MethodImplOptions.AggressiveInlining)]
    static void DoCall4(IFace[] i, int x, int y)
    {
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[2].Call(x, y);
        i[3].Call(x, y);
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[2].Call(x, y);
        i[3].Call(x, y);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    //[MethodImpl(MethodImplOptions.AggressiveInlining)]
    static void DoCall8(IFace[] i, int x, int y)
    {
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[2].Call(x, y);
        i[3].Call(x, y);
        i[4].Call(x, y);
        i[5].Call(x, y);
        i[6].Call(x, y);
        i[7].Call(x, y);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    //[MethodImpl(MethodImplOptions.AggressiveInlining)]
    static void DoCall16(IFace[] i, int x, int y)
    {
        i[0].Call(x, y);
        i[1].Call(x, y);
        i[2].Call(x, y);
        i[3].Call(x, y);
        i[4].Call(x, y);
        i[5].Call(x, y);
        i[6].Call(x, y);
        i[7].Call(x, y);
        i[8].Call(x, y);
        i[9].Call(x, y);
        i[10].Call(x, y);
        i[11].Call(x, y);
        i[12].Call(x, y);
        i[13].Call(x, y);
        i[14].Call(x, y);
        i[15].Call(x, y);
    }
}

interface IFace
{
    int Call(int x, int y);
}

class C0 : IFace { public int Call(int x, int y) => x + y; }
class C1 : IFace { public int Call(int x, int y) => x + y; }
class C2 : IFace { public int Call(int x, int y) => x + y; }
class C3 : IFace { public int Call(int x, int y) => x + y; }
class C4 : IFace { public int Call(int x, int y) => x + y; }
class C5 : IFace { public int Call(int x, int y) => x + y; }
class C6 : IFace { public int Call(int x, int y) => x + y; }
class C7 : IFace { public int Call(int x, int y) => x + y; }
class C8 : IFace { public int Call(int x, int y) => x + y; }
class C9 : IFace { public int Call(int x, int y) => x + y; }
class CA : IFace { public int Call(int x, int y) => x + y; }
class CB : IFace { public int Call(int x, int y) => x + y; }
class CC : IFace { public int Call(int x, int y) => x + y; }
class CD : IFace { public int Call(int x, int y) => x + y; }
class CE : IFace { public int Call(int x, int y) => x + y; }
class CF : IFace { public int Call(int x, int y) => x + y; }

Main:

Details

84
88
108
122
406
206
---------------------------
83
88
103
120
426
206
---------------------------
83
89
104
121
426
206
---------------------------
83
89
104
121
443
207
---------------------------
84
89
103
131
491
203
---------------------------
82
88
101
121
418
209
---------------------------

PR:

Details

138
155
172
183
360
186
---------------------------
126
157
170
185
360
185
---------------------------
127
156
172
182
360
187
---------------------------
127
161
172
183
360
187
---------------------------
126
156
174
191
370
193
---------------------------
132
158
176
187
367
190
---------------------------

As expected, right now this is a regression when the level of polymorphism is small. We start to get wins when the old cache size grows beyond a dozen entries or so.

jakobbotsch · 2026-01-16T09:07:18Z

mov rcx,rbx /// TODO: why does JIT consider rcx clobbered?

It's probably not that we consider it clobbered, but LSRA has rather decided that this is homed in rbx and hence it must be moved to rcx at the ABI boundary. We don't have a mechanism that can say that this is present in both rbx and rcx after the first call.

MichalStrehovsky · 2026-01-19T13:12:11Z

As another experiment, I tried this shape:

mov         rcx,rbx  
mov         r8d,edi  
mov         edx,esi  
lea         r11,[__InterfaceDispatchCell_repro_IFace__Call_repro_Program__DoCallFixed (07FF682916700h)]  
call        RhpResolveInterfaceMethodFast (07FF682864E60h)

In this version RhpResolveInterfaceMethodFast will also tail call the target, but otherwise it's the same assembly (i.e. fast monomorphic check and then hashtable lookup).

The numbers look like below:

Details

97
97
121
126
257
126
---------------------------
83
98
122
123
254
130
---------------------------
82
97
123
126
258
127
---------------------------
82
98
121
126
255
127
---------------------------
81
98
120
126
254
127
---------------------------
81
98
123
125
257
127
---------------------------

The monomorphic case is as fast (maybe even a little bit faster) as what we have in main. Between 2-4 there's a bit of a regression, and starting from 8 the hashtable wins.

The interesting part is that the numbers now also make more sense than the PR version above - lines 3, 4, 6 of the result are roughly the same, which makes sense because they represent the "we found this in the first entry of the hashtable bucket". The numbers on lines 3, 4, 6 is in the PR version above are not "roughly the same" and I wonder if the extra call is to blame.

MichalStrehovsky · 2026-01-20T12:41:13Z

I did another few experiments, this time adding more linear caches (not just for the monomorphic case, but also tried with 2 entries and 4 entries).

The results look like this (for the same above program). Resolver is the shape described in the top post. TailcallCache1 is the shape described in #123252 (comment). TailCallCache2/4 is the same as TailCallCache1, but the dispatch cell has not just space for one MethodTable-CodePointer pair, but for 2/4 of them.

Baseline	Resolver	TailCallCache1	TailCallCache2	TailCallCache4
83	128.6	82.8	83	83.2
88.6	159	98.6	90.8	90.4
104.2	172	123.6	115.8	103.2
122	187	130.2	130.8	136.8
313	364.8	264.4	267.4	313
206.6	187.2	253.8	255.6	160.6

The numbers in Baseline are known to be bad - we know we need to add extra checks/barriers to it to make it correct (#121632 (comment)).

The scenarios with a resolver look pretty bad, so I didn't explore it further.

I think the TailCallCache1 looks the most attractive.

Because in this scheme the shape of the dispatch cell is fixed, RyuJIT would be able to inline the first cache test (leaving hashtable lookup for the helper). What we currently have in Main has a complicated scheme that would be difficult to inline. So we can make the monomorphic path not require a helper call at all.

I was trying to find some more real world scenario to measure on but so far haven't found anything interesting. E.g. JSON serialization has a lot of interface calls, but they are all monomorphic.

MichalStrehovsky · 2026-01-23T13:08:44Z

I pushed out new code and update top-post. This is now based on TailCallCache1 approach above.

The RyuJIT change is completely gross, I don't have a good idea for how it should be implemented. Do we want this to stay CORINFO_VIRTUALCALL_STUB on the JitInterface side, just changing the meaning from "do an indirect call with an extra non-standard argument" to "do a direct call to a helper with an extra non-standard argument"?

We'll need to implement and maintain the hashtable lookup in assembly for the other platforms :(.

We can probably have a CFG flavor of this helper that will call through the validator.

Eventually, we could have RyuJIT inline the monomorphic path.

Eventually, we could use this for generic virtual method calls too (they currently all go to the hashtable directly and the hashtable lookup is in C#).

jkotas · 2026-01-23T23:58:02Z

just changing the meaning from "do an indirect call with an extra non-standard argument"

What would trigger the change in the meaning? I do not think we want to tie this to NAOT ABI since we will want this to work on CoreCLR w/ R2R as well (for iOS).

I think the two options are a new CORINFO_CALL_KIND that you have mentioned, or a new CORJIT_FLAG_ that selects the virtual dispatch style for the whole compilation (existing similar flag is CORJIT_FLAG_USE_PINVOKE_HELPERS).

jkotas · 2026-01-23T23:59:35Z

src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm

+xor         r9d,ecx  
+movsxd      r9,r9d  
+mov         rax,9E3779B97F4A7C15h  
+imul        r9,rax  


Where did this hash function come from?

From here:

runtime/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs

Lines 42 to 55 in 4c067ef

[MethodImpl(MethodImplOptions.NoInlining)]

private static nint Lookup1234(object thisobj, nint cell, ref GenericCache<Key, nint> cache)

{

if (cache.TryGet(new Key(cell, (nint)thisobj.GetMethodTable()), out nint result))

return result;

return SlowPath(thisobj, cell);

}

[MethodImpl(MethodImplOptions.NoInlining)]

private static nint SlowPath(object thisobj, nint cell)

{

return 0;

}

Then I made changes to the basic block layout and register allocation. Not sure if this is the way we want to go, but it's an option I saw.

This is solving the same problem as VSD polymorphic hashtable that has a lot simpler hash function: https://github.com/dotnet/runtime/blob/main/src/coreclr/vm/amd64/virtualcallstubcpu.hpp#L399-L443

We may want to make the design and tunning of this hashtable and the VSD hashtable to be same/similar.

dotnet-policy-service · 2026-02-24T04:17:44Z

Draft Pull Request was automatically closed for 30 days of inactivity. Please let us know if you'd like to reopen it.

regs reorder BB rax rax rbx rdx -> r11 no rdx f Hacks

f

Add a new JIT flag (bit 10) to control whether interface dispatch uses a direct helper call instead of virtual stub dispatch. NativeAOT sets this flag; CoreCLR/ReadyToRun does not. In LowerVirtualStubCall, when the flag is set: - For CT_INDIRECT calls (shared generic dictionary lookup), remove the orphaned gtCallAddr tree from LIR before changing the call type - Convert to CT_USER_FUNC with gtDirectCallAddress pointing to the dispatch resolver helper (CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT) - Clear GTF_CALL_VIRT_STUB and GTF_CALL_M_VIRTSTUB_REL_INDIRECT - The VirtualStubCell arg still passes the dispatch cell in r11 When the flag is not set, the original VSD lowering path is preserved. Re-enable two JIT asserts that were disabled as a hack: - 'Found an unmarked unused value' in CheckLIR (lir.cpp) - 'Expected empty defList at end of block' in buildIntervals (lsrabuild.cpp) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

LowerVirtualStubCall now clears GTF_CALL_VIRT_STUB when converting to a dispatch helper call, so the IsVirtualStub() block in LowerCFGCall is unreachable for NativeAOT. Remove it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

The helper now dispatches to the target (tail-calls on x64) rather than just looking up the address. Rename to reflect the new semantics. Also remove the now-dead RBM_INTERFACEDISPATCH_FOR_SLOT_TRASH/RETURN register masks and their consumers in emit.cpp, codegencommon.cpp, codegenarmarch.cpp, and lsraarmarch.cpp. These were only used by the deleted CFG VSD code path that created CT_HELPER calls with this helper. The new dispatch helper path creates CT_USER_FUNC calls instead. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Add unoptimized implementations of RhpResolveInterfaceMethodFast for platforms that were missing it: Windows i386, Linux amd64, Linux arm64, Linux arm32, Linux loongarch64, and Linux riscv64. Each implementation performs a null check (triggering an AV that EHHelpers translates to NullReferenceException) then falls through to RhpCidResolve via RhpUniversalTransitionTailCall, matching the established RhpInterfaceDispatchSlow pattern for the platform. ARM32 omits the null check due to register pressure (no free scratch register), consistent with its existing RhpInitialDynamicInterfaceDispatch. The CMakeLists.txt entry is moved from a conditional block (Windows amd64/arm64 only) to the unconditional RUNTIME_SOURCES_ARCH_ASM list. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Copilot

Pull request overview

This PR prototypes switching interface dispatch to a resolve-helper-based call sequence (e.g., calling RhpResolveInterfaceMethodFast) and updates the dispatch cell/data layout to support a fast monomorphic cache with out-of-line metadata.

Changes:

Introduces a new JIT flag (USE_DISPATCH_HELPERS) and corresponding helper IDs/names for interface dispatch.
Refactors NativeAOT interface dispatch cells to a fixed 2-pointer shape (cached MethodTable + cached target), with interface+slot info stored in new ReadyToRun sections.
Removes legacy cached interface dispatch infrastructure in NativeAOT and updates runtime/JIT/tooling/asm stubs accordingly.

Reviewed changes

Copilot reviewed 71 out of 71 changed files in this pull request and generated 4 comments.

Show a summary per file

File	Description
src/coreclr/vm/virtualcallstub.h	Removes now-unused resolve worker declaration for interface lookup slot.
src/coreclr/vm/virtualcallstub.cpp	Removes implementation of legacy interface resolve worker path.
src/coreclr/vm/jithelpers.cpp	Renames helper entry point declaration to `JIT_InterfaceDispatchForSlot`.
src/coreclr/vm/frames.h	Removes `ResolveHelperFrame` frame type support.
src/coreclr/vm/arm64/stubs.cpp	Removes argument-reg regdisplay helper tied to removed frame.
src/coreclr/vm/arm64/asmhelpers.asm	Replaces helper implementation with leaf stub dispatch jump.
src/coreclr/vm/amd64/cgenamd64.cpp	Removes `ResolveHelperFrame` regdisplay logic.
src/coreclr/vm/amd64/VirtualCallStubAMD64.asm	Replaces helper with leaf stub dispatch jump.
src/coreclr/vm/FrameTypes.h	Removes `ResolveHelperFrame` from frame type list.
src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp	Adds dumping support for `USE_DISPATCH_HELPERS` flag.
src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp	Adds string name for new CORJIT flag.
src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs	Maps new helper to `RhpResolveInterfaceMethodFast`.
src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj	Includes new `InterfaceDispatchCellInfoSectionNode`.
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs	Adds new ReadyToRun sections for dispatch cells and metadata.
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs	Removes always-rooted dispatch cell section from graph roots (now emitted via header).
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellSectionNode.cs	Changes dispatch cell region emission to BSS section sized by cell count.
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs	Refactors dispatch cell node to sortable symbol with externally-initialized offsets and fixed size.
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellInfoSectionNode.cs	New RO-data section emitting interface+slot metadata and initializing cell offsets.
src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs	Adds `CORJIT_FLAG_USE_DISPATCH_HELPERS`.
src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs	Enables `USE_DISPATCH_HELPERS` flag for non-ReadyToRun builds.
src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs	Renames helper enum to `INTERFACEDISPATCH_FOR_SLOT`.
src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs	Assigns new ReadyToRun section IDs 209/210 for dispatch regions.
src/coreclr/tools/Common/Compiler/DependencyAnalysis/SortableDependencyNode.cs	Adds ordered emission slots for the new dispatch sections.
src/coreclr/runtime/CachedInterfaceDispatch.cpp	Adjusts cache allocation logic for `VTableOffset` case.
src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionaryCell.cs	Allocates new-style dispatch cell + inline metadata block for dynamic cells.
src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs	Removes `RhNewInterfaceDispatchCell` import; adds `RhpRegisterDispatchCache`.
src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs	Removes managed `NewInterfaceDispatchCell` API.
src/coreclr/nativeaot/Runtime/startup.cpp	Removes legacy interface dispatch initialization.
src/coreclr/nativeaot/Runtime/riscv64/DispatchResolve.S	Adds `RhpResolveInterfaceMethodFast` entry for riscv64.
src/coreclr/nativeaot/Runtime/portable.cpp	Removes NYI cached interface dispatch helper stubs.
src/coreclr/nativeaot/Runtime/loongarch64/DispatchResolve.S	Adds `RhpResolveInterfaceMethodFast` entry for loongarch64.
src/coreclr/nativeaot/Runtime/inc/rhbinder.h	Removes legacy cached interface dispatch structs/types.
src/coreclr/nativeaot/Runtime/i386/DispatchResolve.asm	Adds `RhpResolveInterfaceMethodFast` entry for i386.
src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm	Removes “return-result” universal transition variant.
src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm	Simplifies arm64 fast helper to universal-transition slow path.
src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.S	Adds Unix arm64 `RhpResolveInterfaceMethodFast`.
src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm	Deletes old cached interface dispatch slow stub (Windows).
src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S	Deletes old cached interface dispatch slow stub (Unix).
src/coreclr/nativeaot/Runtime/arm/DispatchResolve.S	Adds ARM `RhpResolveInterfaceMethodFast`.
src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm	Removes “return-result” universal transition variant.
src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm	Reworks amd64 fast helper to use 2-pointer cell + global cache.
src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.S	Adds Unix amd64 `RhpResolveInterfaceMethodFast`.
src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm	Deletes old cached interface dispatch slow stub (Windows).
src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S	Deletes old cached interface dispatch slow stub (Unix).
src/coreclr/nativeaot/Runtime/allocheap.h	Deletes allocator used by removed cached interface dispatch code.
src/coreclr/nativeaot/Runtime/allocheap.cpp	Deletes allocator implementation used by removed code.
src/coreclr/nativeaot/Runtime/SyncClean.cpp	Removes GC-time reclaim hook for old dispatch caches.
src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp	Removes universal transition return-result thunk category support.
src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp	Adds `g_pDispatchCache` and `RhpRegisterDispatchCache` internal call.
src/coreclr/nativeaot/Runtime/EHHelpers.cpp	Updates interface-dispatch AV-location list to new helper set.
src/coreclr/nativeaot/Runtime/Crst.h	Removes unused interface dispatch lock enum value.
src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch_Aot.cpp	Deletes old cached interface dispatch PAL implementation.
src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h	Deletes old cached interface dispatch PAL header.
src/coreclr/nativeaot/Runtime/CMakeLists.txt	Removes cached dispatch sources/defines; adds DispatchResolve to all arch asm lists.
src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp	Removes include dependency on deleted cached dispatch header.
src/coreclr/nativeaot/Runtime/AsmOffsets.h	Removes asm offsets/constants for deleted cached dispatch structs.
src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs	Removes internal-call declarations for old cache APIs.
src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs	Implements new dispatch cell metadata lookup, caching, and registration.
src/coreclr/nativeaot/CMakeLists.txt	Removes `-mcx16` option previously used by old dispatch cache synchronization.
src/coreclr/jit/targetarm64.h	Removes register-kill metadata for old interface-lookup helper.
src/coreclr/jit/targetamd64.h	Removes register-kill metadata for old interface-lookup helper.
src/coreclr/jit/lsraarmarch.cpp	Removes special return-reg handling for old interface-lookup helper.
src/coreclr/jit/lower.cpp	Routes VSD calls to `INTERFACEDISPATCH_FOR_SLOT` helper under new flag/CFG, removing older CFG lowering path.
src/coreclr/jit/jitee.h	Adds JIT flag constant and mapping for `USE_DISPATCH_HELPERS`.
src/coreclr/jit/emit.cpp	Removes special saved-reg computation for old interface-lookup helper.
src/coreclr/jit/compiler.h	Adds `ShouldUseDispatchHelpers()` query for the new JIT flag.
src/coreclr/jit/codegencommon.cpp	Removes killset and pending-call-label handling tied to old helper.
src/coreclr/jit/codegenarmarch.cpp	Removes special return-reg selection for old interface-lookup helper.
src/coreclr/inc/jithelpers.h	Renames helper entry to `CORINFO_HELP_INTERFACEDISPATCH_FOR_SLOT`.
src/coreclr/inc/corjitflags.h	Adds `CORJIT_FLAG_USE_DISPATCH_HELPERS` to public header.
src/coreclr/inc/corinfo.h	Renames helper enum to `CORINFO_HELP_INTERFACEDISPATCH_FOR_SLOT`.

Comments suppressed due to low confidence (1)

src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellSectionNode.cs:12

This file now has several unused using directives/aliases (e.g., System.Collections.Generic, Internal.Runtime, Internal.TypeSystem, and the Debug alias). With TreatWarningsAsErrors enabled, unused usings (CS8019) can break the build. Remove the unused using statements.

You can also share your feedback on Copilot code review. Take the survey.

Copilot · 2026-03-11T07:43:59Z

src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs

+                    info = new DispatchCellInfo
+                    {
+                        CellType = DispatchCellType.InterfaceAndSlot,
+                        InterfaceType = (MethodTable*)(dispatchCellInfo + (cellIndex * 2)),


In the non-relative pointer case, InterfaceType is set to the address of the entry inside the info array (dispatchCellInfo + ...) rather than the value stored there. This will produce an invalid MethodTable* and break dispatch resolution on targets that don't use relative pointers. Load/dereference the element value instead of pointer-arithmetic casting.

Suggested change

InterfaceType = (MethodTable*)(dispatchCellInfo + (cellIndex * 2)),

InterfaceType = (MethodTable*)(*(dispatchCellInfo + (cellIndex * 2))),

Copilot · 2026-03-11T07:43:59Z

.../aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellInfoSectionNode.cs

+        {
+            private readonly NodeFactory _factory;
+            private readonly CompilerComparer _comparer = CompilerComparer.Instance;
+
+            public DispatchCellComparer(NodeFactory factory)
+            {
+                _factory = factory;
+            }


DispatchCellComparer stores a _factory field that is never used. With TreatWarningsAsErrors enabled, this will fail the build (CS0414). Remove the unused field/constructor parameter, or use it in comparisons if intended.

Copilot · 2026-03-11T07:44:00Z

...reclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs


 using Internal.Runtime;
 using Internal.Text;
 using Internal.TypeSystem;

+using ILCompiler.DependencyAnalysisFramework;
+


using Internal.Runtime; is now unused in this file (it was previously needed for relocation/flag types). With TreatWarningsAsErrors enabled, this can break the build due to CS8019. Remove the unused using directive.

src/coreclr/nativeaot/Runtime/arm/DispatchResolve.S

MichalStrehovsky · 2026-03-11T11:37:31Z

/azp run runtime-nativeaot-outerloop

azure-pipelines · 2026-03-11T11:37:50Z

Azure Pipelines successfully started running 1 pipeline(s).

When a direct call address is not reachable by BL on ARM32, the address is loaded into a register and the call type is switched to EC_INDIR_R. Clear params.addr to satisfy the emitter's invariant that EC_INDIR_R calls have addr == nullptr. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

MichalStrehovsky · 2026-03-11T22:35:09Z

/azp run runtime-nativeaot-outerloop

azure-pipelines · 2026-03-11T22:35:19Z

Azure Pipelines successfully started running 1 pipeline(s).

The helper dispatches to the target (tail-calls) rather than just resolving the address, so the name should reflect the dispatch semantics, consistent with CORINFO_HELP_INTERFACEDISPATCH_FOR_SLOT. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Introduce guarded variants of the interface dispatch helpers that validate the computed dispatch target using the Windows Control Flow Guard validator (__guard_check_icall_fptr) before tail-calling it. - Add ValidateTarget parameter to the UNIVERSAL_TRANSITION macro and instantiate RhpUniversalTransitionGuardedTailCall alongside the existing RhpUniversalTransitionTailCall (AMD64 and ARM64 .asm). - Refactor RhpInterfaceDispatch into an INTERFACE_DISPATCH macro and instantiate RhpInterfaceDispatchGuarded that routes through the guarded UniversalTransition (AMD64 and ARM64 .asm). - Map CORINFO_HELP_INTERFACEDISPATCH_FOR_SLOT to the guarded variant when ControlFlowGuardAnnotations is enabled in the NativeAOT compiler. - Register the guarded dispatch stub for AV-to-NullRef translation in EHHelpers and the guarded return address in StackFrameIterator, guarded by TARGET_WINDOWS && (TARGET_AMD64 || TARGET_ARM64). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Copilot

Pull request overview

Copilot reviewed 71 out of 71 changed files in this pull request and generated 5 comments.

You can also share your feedback on Copilot code review. Take the survey.

src/coreclr/nativeaot/Runtime/arm/DispatchResolve.S

src/coreclr/vm/amd64/VirtualCallStubAMD64.asm

src/coreclr/vm/arm64/asmhelpers.asm

src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs

...ools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellSectionNode.cs

MichalStrehovsky · 2026-03-12T13:06:23Z

/azp run jit-cfg, runtime-nativeaot-outerloop

azure-pipelines · 2026-03-12T13:06:49Z

Azure Pipelines successfully started running 2 pipeline(s).

jkotas · 2026-03-12T14:16:58Z

src/coreclr/inc/corinfo.h


    CORINFO_HELP_GVMLOOKUP_FOR_SLOT,        // Resolve a generic virtual method target from this pointer and runtime method handle
-    CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT,  // Resolve a non-generic interface method from this pointer and dispatch cell
+    CORINFO_HELP_INTERFACEDISPATCH_FOR_SLOT,  // Dispatch a non-generic interface method from this pointer and dispatch cell


Are we willing to give up better branch prediction from the non-shared indirect callsite?

BTW: I expect that we will need resolve variant for wasm. Dispatch variant won't work well for wasm since the helper would need to be target signature specific.

Are we willing to give up better branch prediction from the non-shared indirect callsite?

The resolve helper is a lot slower than a dispatcher. I had resolve helper here in the past. The numbers are at #123252 (comment). Switching back to a dispatcher (#123252 (comment)) was an improvement everywhere. My guess is the branch predictor also uses past behavior or stack or something else, not just the IP.

The numbers are at

I meant to link #123252 (comment). The resolver helper is in the Resolver column. What this branch is now is TailCallCache1 (but I temporarily deleted a bunch of assembly, so we're running a completely unoptimized version which will be slower).

I expect we'll end up better than TailCallCache1 if we make the hash function a tiny bit simpler. I'm looking at the VSD hashtable right now. We'll not be able to make it as good because we don't want separate stub per callsite, but it could still be a bit better than it was for those measurements.

Port the monomorphic MethodTable cache check from amd64/arm64 to the remaining architectures. i386 (Windows): Use push/pop ebx for a scratch register (matching StubDispatch.asm pattern). EDX is never touched. No barrier needed (x86 TSO guarantees store-store and load-load ordering). ARM32 (Unix): Use r1/r2 saved to red zone as scratch. DMB barrier between MethodTable and Code loads (ARMv7 has no load-acquire). GLOBAL_LABEL (no .thumb_func) for AV location matches EHHelpers.cpp faulting IP comparison. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

MichalStrehovsky · 2026-03-13T12:17:49Z

/azp run runtime-nativeaot-outerloop

azure-pipelines · 2026-03-13T12:18:00Z

Azure Pipelines successfully started running 1 pipeline(s).

github-actions bot added the area-NativeAOT-coreclr label Jan 16, 2026

dotnet-policy-service bot assigned MichalStrehovsky Jan 16, 2026

MichalStrehovsky force-pushed the reshelper branch from aba4515 to 4c067ef Compare January 23, 2026 12:59

jkotas reviewed Jan 23, 2026

View reviewed changes

dotnet-policy-service bot closed this Feb 24, 2026

MichalStrehovsky reopened this Feb 24, 2026

MichalStrehovsky and others added 11 commits March 11, 2026 13:25

wip

149e2f5

regs reorder BB rax rax rbx rdx -> r11 no rdx f Hacks

wip

66b100c

f

GC.Collect

5ee5e0f

Build breaks after rebase

42c4a8f

Redo the CoreCLR CFG path

bfc3003

More fixes

19076e4

GC.Collect

05d32f8

Copilot AI review requested due to automatic review settings March 11, 2026 07:32

MichalStrehovsky force-pushed the reshelper branch from 4c067ef to d5832a5 Compare March 11, 2026 07:32

Copilot started reviewing on behalf of MichalStrehovsky March 11, 2026 07:33 View session

Copilot AI reviewed Mar 11, 2026

View reviewed changes

build-analysis bot mentioned this pull request Mar 11, 2026

Checkout failure: "Git fetch failed with exit code 128" dotnet/arcade#9009

Open

2 tasks

MichalStrehovsky changed the title ~~Use resolve helper for calling interface methods~~ Use dispatch helper for calling interface methods Mar 11, 2026

This was referenced Mar 11, 2026

The Operation will be canceled. The next steps may not contain expected logs. dotnet/dnceng#3008

Open

[android-arm64] The Operation will be canceled. The next steps may not contain expected logs. dotnet/dnceng#6408

Open

MichalStrehovsky and others added 2 commits March 12, 2026 07:32

Temporarily remove fast path

ac62b46

build-analysis bot mentioned this pull request Mar 12, 2026

MsQuic fails with QUIC_STATUS_OUT_OF_MEMORY on AzureLinux #123216

Open

github-actions bot mentioned this pull request Mar 12, 2026

123252 MichalStrehovsky/rt-sz#211

Closed

Copilot AI review requested due to automatic review settings March 12, 2026 06:47

Copilot started reviewing on behalf of MichalStrehovsky March 12, 2026 06:49 View session

Copilot AI reviewed Mar 12, 2026

View reviewed changes

build-analysis bot mentioned this pull request Mar 12, 2026

System.Xml.CharCheckingReaderTests fails with OutOfMemoryException #119746

Open

jkotas reviewed Mar 12, 2026

View reviewed changes

MichalStrehovsky and others added 3 commits March 13, 2026 17:38

amd64/arm64 monomorphic path

0f8ff60

AV in arm32 handling

5b15cb1

	[MethodImpl(MethodImplOptions.NoInlining)]
	private static nint Lookup1234(object thisobj, nint cell, ref GenericCache<Key, nint> cache)
	{
	if (cache.TryGet(new Key(cell, (nint)thisobj.GetMethodTable()), out nint result))
	return result;

	return SlowPath(thisobj, cell);
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	private static nint SlowPath(object thisobj, nint cell)
	{
	return 0;
	}

	InterfaceType = (MethodTable)(dispatchCellInfo + (cellIndex 2)),
	InterfaceType = (MethodTable)((dispatchCellInfo + (cellIndex * 2))),

Conversation

MichalStrehovsky commented Jan 16, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

MichalStrehovsky commented Jan 16, 2026

Uh oh!

jakobbotsch commented Jan 16, 2026

Uh oh!

MichalStrehovsky commented Jan 19, 2026

Uh oh!

MichalStrehovsky commented Jan 20, 2026

Uh oh!

MichalStrehovsky commented Jan 23, 2026

Uh oh!

jkotas commented Jan 23, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jkotas Jan 23, 2026

Choose a reason for hiding this comment

Uh oh!

MichalStrehovsky Jan 24, 2026

Choose a reason for hiding this comment

Uh oh!

jkotas Jan 25, 2026

Choose a reason for hiding this comment

Uh oh!

dotnet-policy-service bot commented Feb 24, 2026

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull request overview

Reviewed changes

Uh oh!

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

Uh oh!

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

Uh oh!

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

MichalStrehovsky commented Mar 11, 2026

Uh oh!

azure-pipelines bot commented Mar 11, 2026

Uh oh!

MichalStrehovsky commented Mar 11, 2026

Uh oh!

azure-pipelines bot commented Mar 11, 2026

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull request overview

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

MichalStrehovsky commented Mar 12, 2026

Uh oh!

azure-pipelines bot commented Mar 12, 2026

Uh oh!

jkotas Mar 12, 2026

Choose a reason for hiding this comment

Uh oh!

MichalStrehovsky Mar 12, 2026

Choose a reason for hiding this comment

Uh oh!

MichalStrehovsky Mar 12, 2026

Choose a reason for hiding this comment

Uh oh!

MichalStrehovsky commented Mar 13, 2026

Uh oh!

azure-pipelines bot commented Mar 13, 2026

Uh oh!

Reviewers

Assignees

Labels

MichalStrehovsky commented Jan 16, 2026 •

edited

Loading

jkotas commented Jan 23, 2026 •

edited

Loading