-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Closed
Description
The behavior change in #73606 is due to a JIT bug no longer triggering, which was bisected to be a side effect of #66887.
@markples tracked down that we are in fact seeing new runtime lookups due to this change. We are not quite sure whether these are necessary or not. Simple example:
using System;
using System.Runtime.CompilerServices;
public class Program
{
public static void Main()
{
Console.WriteLine(CallFoo(new C()));
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static int CallFoo<T>(T val) where T : IFace
{
return val.Foo();
}
}
public interface IFace
{
int Foo();
}
public class C : IFace
{
public int Foo() => 0;
}Codegen before #66887:
; Assembly listing for method Program:CallFoo(System.__Canon):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; Final local variable assignments
;
;* V00 TypeCtx [V00 ] ( 0, 0 ) long -> zero-ref single-def
; V01 arg0 [V01,T00] ( 3, 3 ) ref -> rdx ld-addr-op class-hnd single-def
; V02 OutArgs [V02 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
;
; Lcl frame size = 40
G_M39006_IG01:
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M39006_IG02:
mov rcx, rdx
mov r11, 0xD1FFAB1E
call [r11]IFace:Foo():int:this
nop
;; size=17 bbWeight=1 PerfScore 3.75
G_M39006_IG03:
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code 26, prolog size 4, PerfScore 7.85, instruction count 7, allocated bytes for code 26 (MethodHash=972067a1) for method Program:CallFoo(System.__Canon):int
; ============================================================Codegen on current main:
; Assembly listing for method Program:CallFoo(System.__Canon):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; Final local variable assignments
;
; V00 TypeCtx [V00,T00] ( 5, 4.25) long -> rcx single-def
; V01 arg0 [V01,T02] ( 3, 3 ) ref -> rsi ld-addr-op class-hnd single-def
; V02 OutArgs [V02 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
; V03 tmp1 [V03,T01] ( 4, 5 ) long -> r11 "spilling Runtime Lookup tree"
;* V04 tmp2 [V04 ] ( 0, 0 ) long -> zero-ref "VirtualCall with runtime lookup"
; V05 cse0 [V05,T03] ( 3, 2.25) long -> r11 "CSE - aggressive"
;
; Lcl frame size = 48
G_M39006_IG01:
push rsi
sub rsp, 48
mov qword ptr [rsp+28H], rcx
mov rsi, rdx
;; size=13 bbWeight=1 PerfScore 2.50
G_M39006_IG02:
mov rdx, qword ptr [rcx+56]
mov r11, qword ptr [rdx+16]
test r11, r11
je SHORT G_M39006_IG04
;; size=13 bbWeight=1 PerfScore 5.25
G_M39006_IG03:
jmp SHORT G_M39006_IG05
;; size=2 bbWeight=0.25 PerfScore 0.50
G_M39006_IG04:
mov rdx, 0xD1FFAB1E ; global ptr
call CORINFO_HELP_RUNTIMEHANDLE_METHOD
mov r11, rax
;; size=18 bbWeight=0.25 PerfScore 0.38
G_M39006_IG05:
mov rcx, rsi
call [r11]
nop
;; size=7 bbWeight=1 PerfScore 3.50
G_M39006_IG06:
add rsp, 48
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code 59, prolog size 10, PerfScore 19.78, instruction count 18, allocated bytes for code 59 (MethodHash=972067a1) for method Program:CallFoo(System.__Canon):int
; ============================================================Aside from the perf regression, fixing this should return the behavior of #73606 back to what it was previously. Then we plan to fix the underlying Roslyn and JIT issues (#73615) in .NET 8.