Don't spill Vectors from registers to stack#1146
Conversation
|
/cc @sivarv |
|
Looks good to me. |
6181f4c to
b333578
Compare
|
Did anyone actually disassemble this to make sure the values are on registers with this change? |
|
yep only uses 3 simd registers and plus one which is |
|
Hmm... not sure about that last |
|
@benaadams - Can you provide full jit dump for me to take a look? |
|
I think the earlier one was from me experiementing with both changes. This is just this PR Prior Inlines into 06000168 MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
[1 IL=0001 TR=000002 0600015A] [below ALWAYS_INLINE size] MemoryPoolIterator:get_IsDefault():bool:this
[2 IL=0098 TR=000076 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[3 IL=0108 TR=000141 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[4 IL=0132 TR=000129 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[5 IL=0183 TR=000151 06000152] [below ALWAYS_INLINE size] MemoryPoolBlock:get_Array():ref:this
[6 IL=0006 TR=000617 060038EC] [below ALWAYS_INLINE size] ArraySegment`1:get_Array():ref:this
[0 IL=0281 TR=000585 06000169] [FAILED: unprofitable inline] MemoryPoolIterator:FindFirstEqualByte(byref):int
[0 IL=0304 TR=000575 06000169] [FAILED: unprofitable inline] MemoryPoolIterator:FindFirstEqualByte(byref):int
[0 IL=0327 TR=000565 06000169] [FAILED: unprofitable inline] MemoryPoolIterator:FindFirstEqualByte(byref):int
[7 IL=0380 TR=000530 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[8 IL=0393 TR=000539 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[9 IL=0413 TR=000553 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[10 IL=0501 TR=000434 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[11 IL=0516 TR=000448 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[12 IL=0526 TR=000461 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[13 IL=0554 TR=000184 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[14 IL=0575 TR=000194 0600015D] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
Budget: initialTime=2136, finalTime=2152, initialBudget=21360, currentBudget=21360
Budget: initialSize=15908, finalSize=15908
; Assembly listing for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; fully interruptible
; Final local variable assignments
;
; V00 this [V00,T20] ( 19, 11 ) byref -> [rsp+0xC0] this
; V01 arg1 [V01,T16] ( 4, 514.5) byref -> rdi
; V02 arg2 [V02,T17] ( 4, 514.5) byref -> rbx
; V03 arg3 [V03,T18] ( 4, 514.5) byref -> rbp
; V04 arg4 [V04,T10] ( 11, 1286.5) byref -> rsi
; V05 loc0 [V05,T08] ( 22, 2193 ) ref -> r15
; V06 loc1 [V06,T01] ( 18,11401 ) int -> r12
; V07 loc2 [V07,T23] ( 3, 4.5) bool -> [rsp+0x74]
; V08 loc3 [V08,T07] ( 10, 3955 ) int -> [rsp+0x70]
; V09 loc4 [V09,T09] ( 4, 1792 ) ref -> r13
; V10 loc5 [V10,T14] ( 6, 642 ) int -> [rsp+0x6C]
; V11 loc6 [V11,T15] ( 6, 642 ) int -> [rsp+0x68]
; V12 loc7 [V12,T13] ( 7, 642.5) int -> [rsp+0x64]
; V13 loc8 [V13,T04] ( 4, 4097.5) ubyte -> [rsp+0x60]
; V14 loc9 [V14,T05] ( 4, 4097.5) ubyte -> [rsp+0x5C]
; V15 loc10 [V15,T03] ( 5, 4098 ) ubyte -> [rsp+0x58]
; V16 loc11 [V16,T00] ( 8,25216 ) long -> r13
; V17 loc12 [V17,T02] ( 2, 4608 ) long -> r8
; V18 loc13 [V18 ] ( 3, 1152 ) simd16 -> [rsp+0x40] do-not-enreg[XS] must-init addr-exposed ld-addr-op
; V19 loc14 [V19 ] ( 3, 1152 ) simd16 -> [rsp+0x30] do-not-enreg[XS] must-init addr-exposed ld-addr-op
; V20 loc15 [V20 ] ( 3, 1152 ) simd16 -> [rsp+0x20] do-not-enreg[XS] must-init addr-exposed ld-addr-op
; V21 loc16 [V21,T24] ( 5, 2.5) int -> rdx
; V22 loc17 [V22,T25] ( 5, 2.5) int -> rdi
; V23 tmp0 [V23,T22] ( 3, 6 ) int -> r8
; V24 tmp1 [V24,T19] ( 2, 512 ) long -> r8
; V25 tmp2 [V25,T12] ( 3, 768 ) long -> r8
; V26 tmp3 [V26,T06] ( 4, 4096 ) simd16 -> mm0
; V27 tmp4 [V27,T26] ( 2, 2 ) int -> rcx
; V28 tmp5 [V28,T21] ( 9, 10 ) int -> rax
; V29 tmp6 [V29,T11] ( 2, 1024 ) byref -> r13
; V30 OutArgs [V30 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 120
G_M3332_IG01:
4157 push r15
4156 push r14
4155 push r13
4154 push r12
57 push rdi
56 push rsi
55 push rbp
53 push rbx
4883EC78 sub rsp, 120
488BF1 mov rsi, rcx
488D7C2420 lea rdi, [rsp+20H]
B90C000000 mov ecx, 12
33C0 xor rax, rax
F3AB rep stosd
488BCE mov rcx, rsi
4C8BF1 mov r14, rcx
488BFA mov rdi, rdx
498BD8 mov rbx, r8
498BE9 mov rbp, r9
488BB424E0000000 mov rsi, bword ptr [rsp+E0H]
G_M3332_IG02:
49833E00 cmp gword ptr [r14], 0
750A jne SHORT G_M3332_IG03
B8FFFFFFFF mov eax, -1
E916040000 jmp G_M3332_IG31
G_M3332_IG03:
4D8B3E mov r15, gword ptr [r14]
458B6608 mov r12d, dword ptr [r14+8]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
418B5734 mov edx, dword ptr [r15+52]
448BC2 mov r8d, edx
452BC4 sub r8d, r12d
41B9FFFFFF7F mov r9d, 0x7FFFFFFF
C7442468FFFFFF7F mov dword ptr [rsp+68H], 0x7FFFFFFF
C7442464FFFFFF7F mov dword ptr [rsp+64H], 0x7FFFFFFF
0FB617 movzx rdx, byte ptr [rdi]
0FB6D2 movzx rdx, dl
89542460 mov dword ptr [rsp+60H], edx
0FB60B movzx rcx, byte ptr [rbx]
0FB6C9 movzx rcx, cl
894C245C mov dword ptr [rsp+5CH], ecx
0FB64500 movzx rax, byte ptr [rbp]
0FB6C0 movzx rax, al
89442458 mov dword ptr [rsp+58H], eax
4585C0 test r8d, r8d
7513 jne SHORT G_M3332_IG05
G_M3332_IG04:
4C3B3E cmp r15, gword ptr [rsi]
7515 jne SHORT G_M3332_IG06
443B6608 cmp r12d, dword ptr [rsi+8]
410F9FC0 setg r8b
450FB6C0 movzx r8, r8b
EB0A jmp SHORT G_M3332_IG07
G_M3332_IG05:
44896C2474 mov dword ptr [rsp+74H], r13d
EB51 jmp SHORT G_M3332_IG10
G_M3332_IG06:
4533C0 xor r8d, r8d
G_M3332_IG07:
450BC5 or r8d, r13d
4585C0 test r8d, r8d
741C je SHORT G_M3332_IG08
498BCE mov rcx, r14
498BD7 mov rdx, r15
E8C0F4B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B4E08 mov ecx, dword ptr [rsi+8]
41894E08 mov dword ptr [r14+8], ecx
B8FFFFFFFF mov eax, -1
E97D030000 jmp G_M3332_IG31
G_M3332_IG08:
4D8B7F18 mov r15, gword ptr [r15+24]
458B6730 mov r12d, dword ptr [r15+48]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
44896C2474 mov dword ptr [rsp+74H], r13d
458B4734 mov r8d, dword ptr [r15+52]
452BC4 sub r8d, r12d
G_M3332_IG09:
4585C0 test r8d, r8d
0F84AA010000 je G_M3332_IG15
G_M3332_IG10:
458B2F mov r13d, dword ptr [r15]
4D8D6F38 lea r13, bword ptr [r15+56]
4D8B6D00 mov r13, gword ptr [r13]
4585C0 test r8d, r8d
7EE7 jle SHORT G_M3332_IG09
G_M3332_IG11:
4489442470 mov dword ptr [rsp+70H], r8d
4183F810 cmp r8d, 16
0F8C1B020000 jl G_M3332_IG22
4C89B424C0000000 mov bword ptr [rsp+C0H], r14
453B6508 cmp r12d, dword ptr [r13+8]
0F8333030000 jae G_M3332_IG32
458D74240F lea r14d, [r12+15]
453B7508 cmp r14d, dword ptr [r13+8]
0F8324030000 jae G_M3332_IG32
430F10442510 movups xmm0, xmmword ptr [r13+r12+16]
0F100F movups xmm1, xmmword ptr [rdi]
660F74C8 pcmpeqb xmm1, xmm0
0F294C2440 movaps xmmword ptr [rsp+40H], xmm1
0F100B movups xmm1, xmmword ptr [rbx]
660F74C8 pcmpeqb xmm1, xmm0
0F294C2430 movaps xmmword ptr [rsp+30H], xmm1
0F104D00 movups xmm1, xmmword ptr [rbp]
660F74C1 pcmpeqb xmm0, xmm1
0F29442420 movaps xmmword ptr [rsp+20H], xmm0
0F28442440 movaps xmm0, xmmword ptr [rsp+40H]
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7413 je SHORT G_M3332_IG12
488D4C2440 lea rcx, bword ptr [rsp+40H]
E891FAFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
8944246C mov dword ptr [rsp+6CH], eax
448B4C246C mov r9d, dword ptr [rsp+6CH]
G_M3332_IG12:
0F28442430 movaps xmm0, xmmword ptr [rsp+30H]
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7418 je SHORT G_M3332_IG13
44894C246C mov dword ptr [rsp+6CH], r9d
488D4C2430 lea rcx, bword ptr [rsp+30H]
E84AFAFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
89442468 mov dword ptr [rsp+68H], eax
448B4C246C mov r9d, dword ptr [rsp+6CH]
G_M3332_IG13:
0F28442420 movaps xmm0, xmmword ptr [rsp+20H]
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
741C je SHORT G_M3332_IG14
44894C246C mov dword ptr [rsp+6CH], r9d
488D4C2420 lea rcx, bword ptr [rsp+20H]
E803FAFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
448BF0 mov r14d, eax
4489742464 mov dword ptr [rsp+64H], r14d
448B4C246C mov r9d, dword ptr [rsp+6CH]
G_M3332_IG14:
44894C246C mov dword ptr [rsp+6CH], r9d
4181F9FFFFFF7F cmp r9d, 0x7FFFFFFF
7567 jne SHORT G_M3332_IG16
817C2468FFFFFF7F cmp dword ptr [rsp+68H], 0x7FFFFFFF
755D jne SHORT G_M3332_IG16
817C2464FFFFFF7F cmp dword ptr [rsp+64H], 0x7FFFFFFF
7553 jne SHORT G_M3332_IG16
448B442470 mov r8d, dword ptr [rsp+70H]
4183C0F0 add r8d, -16
4183C410 add r12d, 16
4C3B3E cmp r15, gword ptr [rsi]
0F85C1010000 jne G_M3332_IG29
443B6608 cmp r12d, dword ptr [rsi+8]
0F8EB7010000 jle G_M3332_IG29
488B8C24C0000000 mov rcx, bword ptr [rsp+C0H]
498BD7 mov rdx, r15
E8F4F2B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B5608 mov edx, dword ptr [rsi+8]
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E9A9010000 jmp G_M3332_IG31
G_M3332_IG15:
448B6C2474 mov r13d, dword ptr [rsp+74H]
E9E1FDFFFF jmp G_M3332_IG04
G_M3332_IG16:
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
498BCE mov rcx, r14
498BD7 mov rdx, r15
E8BEF2B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
448B4C246C mov r9d, dword ptr [rsp+6CH]
8B7C2468 mov edi, dword ptr [rsp+68H]
443BCF cmp r9d, edi
7D1C jge SHORT G_M3332_IG18
8B7C2464 mov edi, dword ptr [rsp+64H]
443BCF cmp r9d, edi
7D0B jge SHORT G_M3332_IG17
8B5C2460 mov ebx, dword ptr [rsp+60H]
8BD3 mov edx, ebx
418BF9 mov edi, r9d
EB23 jmp SHORT G_M3332_IG20
G_M3332_IG17:
8B6C2458 mov ebp, dword ptr [rsp+58H]
8BD5 mov edx, ebp
EB1B jmp SHORT G_M3332_IG20
G_M3332_IG18:
448B6C2464 mov r13d, dword ptr [rsp+64H]
413BFD cmp edi, r13d
7D08 jge SHORT G_M3332_IG19
8B5C245C mov ebx, dword ptr [rsp+5CH]
8BD3 mov edx, ebx
EB09 jmp SHORT G_M3332_IG20
G_M3332_IG19:
8B6C2458 mov ebp, dword ptr [rsp+58H]
8BD5 mov edx, ebp
418BFD mov edi, r13d
G_M3332_IG20:
418D0C3C lea ecx, [r12+rdi]
41894E08 mov dword ptr [r14+8], ecx
4C3B3E cmp r15, gword ptr [rsi]
751A jne SHORT G_M3332_IG21
418B4E08 mov ecx, dword ptr [r14+8]
3B4E08 cmp ecx, dword ptr [rsi+8]
7E11 jle SHORT G_M3332_IG21
8B5608 mov edx, dword ptr [rsi+8]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E920010000 jmp G_M3332_IG31
G_M3332_IG21:
8BC2 mov eax, edx
E919010000 jmp G_M3332_IG31
G_M3332_IG22:
4D63EC movsxd r13, r12d
4D036F28 add r13, qword ptr [r15+40]
4C3B3E cmp r15, gword ptr [rsi]
740F je SHORT G_M3332_IG23
448B442470 mov r8d, dword ptr [rsp+70H]
4D63C0 movsxd r8, r8d
4D03C5 add r8, r13
EB23 jmp SHORT G_M3332_IG24
G_M3332_IG23:
4C89B424C0000000 mov bword ptr [rsp+C0H], r14
4D8B4728 mov r8, qword ptr [r15+40]
448B7608 mov r14d, dword ptr [rsi+8]
4D63F6 movsxd r14, r14d
4F8D443001 lea r8, [r8+r14+1]
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
G_M3332_IG24:
4C89B424C0000000 mov bword ptr [rsp+C0H], r14
G_M3332_IG25:
450FB67500 movzx r14, byte ptr [r13]
8B542460 mov edx, dword ptr [rsp+60H]
443BF2 cmp r14d, edx
7527 jne SHORT G_M3332_IG26
488B8C24C0000000 mov rcx, bword ptr [rsp+C0H]
498BD7 mov rdx, r15
E8ECF1B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
45896608 mov dword ptr [r14+8], r12d
8B5C2460 mov ebx, dword ptr [rsp+60H]
8BC3 mov eax, ebx
E9A0000000 jmp G_M3332_IG31
G_M3332_IG26:
450FB67500 movzx r14, byte ptr [r13]
8B4C245C mov ecx, dword ptr [rsp+5CH]
443BF1 cmp r14d, ecx
7524 jne SHORT G_M3332_IG27
488B8C24C0000000 mov rcx, bword ptr [rsp+C0H]
498BD7 mov rdx, r15
E8B7F1B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
45896608 mov dword ptr [r14+8], r12d
8B5C245C mov ebx, dword ptr [rsp+5CH]
8BC3 mov eax, ebx
EB6E jmp SHORT G_M3332_IG31
G_M3332_IG27:
450FB67500 movzx r14, byte ptr [r13]
8B442458 mov eax, dword ptr [rsp+58H]
443BF0 cmp r14d, eax
7524 jne SHORT G_M3332_IG28
488B8C24C0000000 mov rcx, bword ptr [rsp+C0H]
498BD7 mov rdx, r15
E885F1B05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
45896608 mov dword ptr [r14+8], r12d
8B6C2458 mov ebp, dword ptr [rsp+58H]
8BC5 mov eax, ebp
EB3C jmp SHORT G_M3332_IG31
G_M3332_IG28:
49FFC5 inc r13
41FFC4 inc r12d
4D3BE8 cmp r13, r8
0F8558FFFFFF jne G_M3332_IG25
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
448B6C2474 mov r13d, dword ptr [rsp+74H]
E960FCFFFF jmp G_M3332_IG04
G_M3332_IG29:
4585C0 test r8d, r8d
448B4C246C mov r9d, dword ptr [rsp+6CH]
4C8BB424C0000000 mov r14, bword ptr [rsp+C0H]
0F8FC5FCFFFF jg G_M3332_IG11
G_M3332_IG30:
E9A7FCFFFF jmp G_M3332_IG09
G_M3332_IG31:
4883C478 add rsp, 120
5B pop rbx
5D pop rbp
5E pop rsi
5F pop rdi
415C pop r12
415D pop r13
415E pop r14
415F pop r15
C3 ret
G_M3332_IG32:
E84F498D5F call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 1138, prolog size 56 for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
; ============================================================
Successfully inlined MemoryPoolIterator:get_IsDefault():bool:this (10 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolBlock:get_Array():ref:this (12 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined ArraySegment`1:get_Array():ref:this (7 IL bytes) (depth 2) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]Post ; Assembly listing for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; fully interruptible
; Final local variable assignments
;
; V00 this [V00,T25] ( 19, 11 ) byref -> [rsp+0xB0] this
; V01 arg1 [V01,T21] ( 4, 514.5) byref -> rdi
; V02 arg2 [V02,T22] ( 4, 514.5) byref -> rbx
; V03 arg3 [V03,T23] ( 4, 514.5) byref -> rbp
; V04 arg4 [V04,T11] ( 11, 1286.5) byref -> rsi
; V05 loc0 [V05,T08] ( 22, 2193 ) ref -> r15
; V06 loc1 [V06,T01] ( 18,11401 ) int -> r12
; V07 loc2 [V07,T28] ( 3, 4.5) bool -> [rsp+0x4C]
; V08 loc3 [V08,T06] ( 10, 3955 ) int -> [rsp+0x48]
; V09 loc4 [V09,T09] ( 4, 1792 ) ref -> r13
; V10 loc5 [V10,T19] ( 6, 642 ) int -> [rsp+0x44]
; V11 loc6 [V11,T20] ( 6, 642 ) int -> [rsp+0x40]
; V12 loc7 [V12,T18] ( 7, 642.5) int -> [rsp+0x3C]
; V13 loc8 [V13,T04] ( 4, 4097.5) ubyte -> [rsp+0x38]
; V14 loc9 [V14,T05] ( 4, 4097.5) ubyte -> [rsp+0x34]
; V15 loc10 [V15,T03] ( 5, 4098 ) ubyte -> [rsp+0x30]
; V16 loc11 [V16,T00] ( 8,25216 ) long -> r13
; V17 loc12 [V17,T02] ( 2, 4608 ) long -> r8
; V18 loc13 [V18 ] ( 6, 768 ) simd16 -> [rsp+0x20] do-not-enreg[XS] must-init addr-exposed ld-addr-op
; V19 loc14 [V19,T12] ( 3, 1152 ) simd16 -> mm1 ld-addr-op
; V20 loc15 [V20,T13] ( 3, 1152 ) simd16 -> mm0 ld-addr-op
; V21 loc16 [V21,T14] ( 3, 1152 ) simd16 -> mm0 ld-addr-op
; V22 loc17 [V22,T29] ( 5, 2.5) int -> rdx
; V23 loc18 [V23,T30] ( 5, 2.5) int -> rdi
; V24 tmp0 [V24,T27] ( 3, 6 ) int -> r8
; V25 tmp1 [V25,T24] ( 2, 512 ) long -> r8
; V26 tmp2 [V26,T17] ( 3, 768 ) long -> r8
; V27 tmp3 [V27,T07] ( 3, 3072 ) simd16 -> mm0
; V28 tmp4 [V28,T10] ( 3, 1536 ) simd16 -> mm6
; V29 tmp5 [V29,T16] ( 2, 1024 ) simd16 -> mm6
; V30 tmp6 [V30,T31] ( 2, 2 ) int -> rcx
; V31 tmp7 [V31,T26] ( 9, 10 ) int -> rax
; V32 tmp8 [V32,T15] ( 2, 1024 ) byref -> r13
; V33 OutArgs [V33 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 104
G_M3332_IG01:
4157 push r15
4156 push r14
4155 push r13
4154 push r12
57 push rdi
56 push rsi
55 push rbp
53 push rbx
4883EC68 sub rsp, 104
0F29742450 movaps qword ptr [rsp+50H], xmm6
33C0 xor rax, rax
4889442420 mov qword ptr [rsp+20H], rax
4889442428 mov qword ptr [rsp+28H], rax
4C8BF1 mov r14, rcx
488BFA mov rdi, rdx
498BD8 mov rbx, r8
498BE9 mov rbp, r9
488BB424D0000000 mov rsi, bword ptr [rsp+D0H]
G_M3332_IG02:
49833E00 cmp gword ptr [r14], 0
750A jne SHORT G_M3332_IG03
B8FFFFFFFF mov eax, -1
E90A040000 jmp G_M3332_IG31
G_M3332_IG03:
4D8B3E mov r15, gword ptr [r14]
458B6608 mov r12d, dword ptr [r14+8]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
418B5734 mov edx, dword ptr [r15+52]
448BC2 mov r8d, edx
452BC4 sub r8d, r12d
41B9FFFFFF7F mov r9d, 0x7FFFFFFF
C7442440FFFFFF7F mov dword ptr [rsp+40H], 0x7FFFFFFF
C744243CFFFFFF7F mov dword ptr [rsp+3CH], 0x7FFFFFFF
0FB617 movzx rdx, byte ptr [rdi]
0FB6D2 movzx rdx, dl
89542438 mov dword ptr [rsp+38H], edx
0FB60B movzx rcx, byte ptr [rbx]
0FB6C9 movzx rcx, cl
894C2434 mov dword ptr [rsp+34H], ecx
0FB64500 movzx rax, byte ptr [rbp]
0FB6C0 movzx rax, al
89442430 mov dword ptr [rsp+30H], eax
4585C0 test r8d, r8d
7513 jne SHORT G_M3332_IG05
G_M3332_IG04:
4C3B3E cmp r15, gword ptr [rsi]
7515 jne SHORT G_M3332_IG06
443B6608 cmp r12d, dword ptr [rsi+8]
410F9FC0 setg r8b
450FB6C0 movzx r8, r8b
EB0A jmp SHORT G_M3332_IG07
G_M3332_IG05:
44896C244C mov dword ptr [rsp+4CH], r13d
EB51 jmp SHORT G_M3332_IG10
G_M3332_IG06:
4533C0 xor r8d, r8d
G_M3332_IG07:
450BC5 or r8d, r13d
4585C0 test r8d, r8d
741C je SHORT G_M3332_IG08
498BCE mov rcx, r14
498BD7 mov rdx, r15
E873EEB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B4E08 mov ecx, dword ptr [rsi+8]
41894E08 mov dword ptr [r14+8], ecx
B8FFFFFFFF mov eax, -1
E971030000 jmp G_M3332_IG31
G_M3332_IG08:
4D8B7F18 mov r15, gword ptr [r15+24]
458B6730 mov r12d, dword ptr [r15+48]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
44896C244C mov dword ptr [rsp+4CH], r13d
458B4734 mov r8d, dword ptr [r15+52]
452BC4 sub r8d, r12d
G_M3332_IG09:
4585C0 test r8d, r8d
0F849E010000 je G_M3332_IG15
G_M3332_IG10:
458B2F mov r13d, dword ptr [r15]
4D8D6F38 lea r13, bword ptr [r15+56]
4D8B6D00 mov r13, gword ptr [r13]
4585C0 test r8d, r8d
7EE7 jle SHORT G_M3332_IG09
G_M3332_IG11:
4489442448 mov dword ptr [rsp+48H], r8d
4183F810 cmp r8d, 16
0F8C0F020000 jl G_M3332_IG22
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
453B6508 cmp r12d, dword ptr [r13+8]
0F832C030000 jae G_M3332_IG32
458D74240F lea r14d, [r12+15]
453B7508 cmp r14d, dword ptr [r13+8]
0F831D030000 jae G_M3332_IG32
430F10442510 movups xmm0, xmmword ptr [r13+r12+16]
0F100F movups xmm1, xmmword ptr [rdi]
660F74C8 pcmpeqb xmm1, xmm0
0F28F0 movaps xmm6, xmm0
660FEFC0 pxor xmm0, xmm0
0F28D1 movaps xmm2, xmm1
660F76D0 pcmpeqd xmm2, xmm0
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7418 je SHORT G_M3332_IG12
0F294C2420 movaps xmmword ptr [rsp+20H], xmm1
488D4C2420 lea rcx, bword ptr [rsp+20H]
E80FEBFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
89442444 mov dword ptr [rsp+44H], eax
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG12:
0F1003 movups xmm0, xmmword ptr [rbx]
660F74C6 pcmpeqb xmm0, xmm6
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
741D je SHORT G_M3332_IG13
44894C2444 mov dword ptr [rsp+44H], r9d
0F29442420 movaps xmmword ptr [rsp+20H], xmm0
488D4C2420 lea rcx, bword ptr [rsp+20H]
E8C1EAFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
89442440 mov dword ptr [rsp+40H], eax
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG13:
0F104500 movups xmm0, xmmword ptr [rbp]
660F74C6 pcmpeqb xmm0, xmm6
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7421 je SHORT G_M3332_IG14
44894C2444 mov dword ptr [rsp+44H], r9d
0F29442420 movaps xmmword ptr [rsp+20H], xmm0
488D4C2420 lea rcx, bword ptr [rsp+20H]
E872EAFFFF call MemoryPoolIterator:FindFirstEqualByte(byref):int
448BF0 mov r14d, eax
448974243C mov dword ptr [rsp+3CH], r14d
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG14:
44894C2444 mov dword ptr [rsp+44H], r9d
4181F9FFFFFF7F cmp r9d, 0x7FFFFFFF
7567 jne SHORT G_M3332_IG16
817C2440FFFFFF7F cmp dword ptr [rsp+40H], 0x7FFFFFFF
755D jne SHORT G_M3332_IG16
817C243CFFFFFF7F cmp dword ptr [rsp+3CH], 0x7FFFFFFF
7553 jne SHORT G_M3332_IG16
448B442448 mov r8d, dword ptr [rsp+48H]
4183C0F0 add r8d, -16
4183C410 add r12d, 16
4C3B3E cmp r15, gword ptr [rsi]
0F85C1010000 jne G_M3332_IG29
443B6608 cmp r12d, dword ptr [rsi+8]
0F8EB7010000 jle G_M3332_IG29
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E8B3ECB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B5608 mov edx, dword ptr [rsi+8]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E9A9010000 jmp G_M3332_IG31
G_M3332_IG15:
448B6C244C mov r13d, dword ptr [rsp+4CH]
E9EDFDFFFF jmp G_M3332_IG04
G_M3332_IG16:
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
498BCE mov rcx, r14
498BD7 mov rdx, r15
E87DECB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
448B4C2444 mov r9d, dword ptr [rsp+44H]
8B7C2440 mov edi, dword ptr [rsp+40H]
443BCF cmp r9d, edi
7D1C jge SHORT G_M3332_IG18
8B7C243C mov edi, dword ptr [rsp+3CH]
443BCF cmp r9d, edi
7D0B jge SHORT G_M3332_IG17
8B5C2438 mov ebx, dword ptr [rsp+38H]
8BD3 mov edx, ebx
418BF9 mov edi, r9d
EB23 jmp SHORT G_M3332_IG20
G_M3332_IG17:
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BD5 mov edx, ebp
EB1B jmp SHORT G_M3332_IG20
G_M3332_IG18:
448B6C243C mov r13d, dword ptr [rsp+3CH]
413BFD cmp edi, r13d
7D08 jge SHORT G_M3332_IG19
8B5C2434 mov ebx, dword ptr [rsp+34H]
8BD3 mov edx, ebx
EB09 jmp SHORT G_M3332_IG20
G_M3332_IG19:
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BD5 mov edx, ebp
418BFD mov edi, r13d
G_M3332_IG20:
418D0C3C lea ecx, [r12+rdi]
41894E08 mov dword ptr [r14+8], ecx
4C3B3E cmp r15, gword ptr [rsi]
751A jne SHORT G_M3332_IG21
418B4E08 mov ecx, dword ptr [r14+8]
3B4E08 cmp ecx, dword ptr [rsi+8]
7E11 jle SHORT G_M3332_IG21
8B5608 mov edx, dword ptr [rsi+8]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E920010000 jmp G_M3332_IG31
G_M3332_IG21:
8BC2 mov eax, edx
E919010000 jmp G_M3332_IG31
G_M3332_IG22:
4D63EC movsxd r13, r12d
4D036F28 add r13, qword ptr [r15+40]
4C3B3E cmp r15, gword ptr [rsi]
740F je SHORT G_M3332_IG23
448B442448 mov r8d, dword ptr [rsp+48H]
4D63C0 movsxd r8, r8d
4D03C5 add r8, r13
EB23 jmp SHORT G_M3332_IG24
G_M3332_IG23:
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
4D8B4728 mov r8, qword ptr [r15+40]
448B7608 mov r14d, dword ptr [rsi+8]
4D63F6 movsxd r14, r14d
4F8D443001 lea r8, [r8+r14+1]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
G_M3332_IG24:
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
G_M3332_IG25:
450FB67500 movzx r14, byte ptr [r13]
8B542438 mov edx, dword ptr [rsp+38H]
443BF2 cmp r14d, edx
7527 jne SHORT G_M3332_IG26
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E8ABEBB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B5C2438 mov ebx, dword ptr [rsp+38H]
8BC3 mov eax, ebx
E9A0000000 jmp G_M3332_IG31
G_M3332_IG26:
450FB67500 movzx r14, byte ptr [r13]
8B4C2434 mov ecx, dword ptr [rsp+34H]
443BF1 cmp r14d, ecx
7524 jne SHORT G_M3332_IG27
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E876EBB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B5C2434 mov ebx, dword ptr [rsp+34H]
8BC3 mov eax, ebx
EB6E jmp SHORT G_M3332_IG31
G_M3332_IG27:
450FB67500 movzx r14, byte ptr [r13]
8B442430 mov eax, dword ptr [rsp+30H]
443BF0 cmp r14d, eax
7524 jne SHORT G_M3332_IG28
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E844EBB25F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BC5 mov eax, ebp
EB3C jmp SHORT G_M3332_IG31
G_M3332_IG28:
49FFC5 inc r13
41FFC4 inc r12d
4D3BE8 cmp r13, r8
0F8558FFFFFF jne G_M3332_IG25
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
448B6C244C mov r13d, dword ptr [rsp+4CH]
E96CFCFFFF jmp G_M3332_IG04
G_M3332_IG29:
4585C0 test r8d, r8d
448B4C2444 mov r9d, dword ptr [rsp+44H]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
0F8FD1FCFFFF jg G_M3332_IG11
G_M3332_IG30:
E9B3FCFFFF jmp G_M3332_IG09
G_M3332_IG31:
0F28742450 movaps xmm6, qword ptr [rsp+50H]
4883C468 add rsp, 104
5B pop rbx
5D pop rbp
5E pop rsi
5F pop rdi
415C pop r12
415D pop r13
415E pop r14
415F pop r15
C3 ret
G_M3332_IG32:
E809438F5F call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 1128, prolog size 53 for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this |
|
Perfect, I can see in Post JitDump, only V18 (that corresponds to Vector temp) is now marked as addr-exposed. Indeed we are getting the expected benefit of byte0Equals (V19), byte1Equals (v20), byte2Equals (V21) staying in registers. |
|
Cool! @halter73 FYI |
|
Cool! Since we've been microbenchmarking Seek/FindFirstEqualByte a lot recently, can we get microbencharks for this change too? While it makes sense that making sure byref parameters are always copied between registers instead of from memory would make things faster, it would be nice to get this quantified. We made Vectors ref parameters to begin with because we found copying them to be expensive. Remember we found that our plaintext RPS numbers improved when we initially made Vectors ref arguments. With this change, it looks like all calls to Also, would it be reasonable for RuyJIT to identify when addr-exposed locals are really "constant" statics and optimize that code path? Would more aggressive inlining allow this somehow? |
|
We made Vectors ref parameters to begin with because we found copying them to be expensive. Remember we found that our plaintext RPS numbers improved when we initially made Vectors ref arguments. With this change, it looks like all calls to FindFirstEqualByte(ref tmp) first copy a ref Vectory into temp. This makes me wonder why FindFirstEqualByte has a ref parameter to begin with. Is copying to a local variable and then calling a method with a ref parameter really more efficient than using a non-ref parameter and copying implicitly? Note that this code is under h/w acceleration is true path. So SIMD vector type locals are reg allocated. Hence making a copy would either lead to reg-to-reg move or reg-to-mem move (a single instruction). We could pass param by value instead of by ref to FindFirstEqualByte(). In that case too a copy is made. In terms of efficiency both of them would almost be the same on x64 where RyuJIT uses hardware acceleration. On 32-bit x86, hardware acceleration is still not available. So you would incur the cost of struct copy on targets where h/w acceleration is not available. Hence it is best to pass the param by ref. Regarding addr-exposed locals, these cannot be treated as constants as the they could be modified by the routine to which they are passed as args. Regarding using aggressive in-lining attribute on FindFirstEqualByte() method: Currently we are tracking this issue on our side https://github.com/dotnet/coreclr/issues/7386 In summary to realize in-lining benefits, we would need to unroll the loop in FindFirstEqualByte(). |
|
FindFirstEqualByte is currently not called in the non hw-accelerated code path, so I would prefer the cleaner code for that. Microbenchmarks would also be good. I'm fairly certain we were measuring hw-accelerated results when we saw a plaintext RPS improvement in making Vectors byref. |
|
Trying it byval it looks like it pushes to to stack for the call to edit checking I'm updating right dll :) |
|
@benaadams That's really unfortunate if the cleaner code that looks like it should do the same thing in terms of copying is really less efficient. So your saying the explicit copy and byref call simply copies the vector data from one register to another, while the byval call copies from a register to a stack address and then back to a register? |
|
Double checked and actually can't see any difference. Passing byval marks it as Passing byref marks it as So the byval should be the same (for this bit) and inlining (as in #1138) should resolve that. Will push change; the new arrangement in this PR is better as it holds onto them in a less overlapping way so is less register pressure. Re: passing the original Vectors by ref either you .ctor them on the fly which is pretty terrible https://github.com/dotnet/coreclr/issues/7459 or it has to do a copy from heap to stack, then pass ref which it looks like it then reloads form stack; rather than using just using the heap ref to pass. |
|
@sivarv all that's really changed is The output looks the same? The two functions Inlines into 06000167 MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
[1 IL=0001 TR=000002 06000159] [below ALWAYS_INLINE size] MemoryPoolIterator:get_IsDefault():bool:this
[2 IL=0098 TR=000076 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[3 IL=0108 TR=000141 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[4 IL=0132 TR=000129 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[5 IL=0183 TR=000151 06000151] [below ALWAYS_INLINE size] MemoryPoolBlock:get_Array():ref:this
[6 IL=0006 TR=000636 060038EC] [below ALWAYS_INLINE size] ArraySegment`1:get_Array():ref:this
[0 IL=0254 TR=000601 06000168] [FAILED: noinline per IL/cached result] MemoryPoolIterator:FindFirstEqualByte(struct):int
[0 IL=0291 TR=000589 06000168] [FAILED: noinline per IL/cached result] MemoryPoolIterator:FindFirstEqualByte(struct):int
[0 IL=0327 TR=000580 06000168] [FAILED: noinline per IL/cached result] MemoryPoolIterator:FindFirstEqualByte(struct):int
[7 IL=0380 TR=000546 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[8 IL=0393 TR=000555 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[9 IL=0413 TR=000569 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[10 IL=0501 TR=000450 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[11 IL=0516 TR=000464 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[12 IL=0526 TR=000477 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[13 IL=0554 TR=000184 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[14 IL=0575 TR=000194 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
Budget: initialTime=2136, finalTime=2152, initialBudget=21360, currentBudget=21360
Budget: initialSize=15908, finalSize=15908
; Assembly listing for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; fully interruptible
; Final local variable assignments
;
; V00 this [V00,T25] ( 19, 11 ) byref -> [rsp+0xB0] this
; V01 arg1 [V01,T21] ( 4, 514.5) byref -> rdi
; V02 arg2 [V02,T22] ( 4, 514.5) byref -> rbx
; V03 arg3 [V03,T23] ( 4, 514.5) byref -> rbp
; V04 arg4 [V04,T11] ( 11, 1286.5) byref -> rsi
; V05 loc0 [V05,T08] ( 22, 2193 ) ref -> r15
; V06 loc1 [V06,T01] ( 18,11401 ) int -> r12
; V07 loc2 [V07,T28] ( 3, 4.5) bool -> [rsp+0x4C]
; V08 loc3 [V08,T06] ( 10, 3955 ) int -> [rsp+0x48]
; V09 loc4 [V09,T09] ( 4, 1792 ) ref -> r13
; V10 loc5 [V10,T19] ( 6, 642 ) int -> [rsp+0x44]
; V11 loc6 [V11,T20] ( 6, 642 ) int -> [rsp+0x40]
; V12 loc7 [V12,T18] ( 7, 642.5) int -> [rsp+0x3C]
; V13 loc8 [V13,T04] ( 4, 4097.5) ubyte -> [rsp+0x38]
; V14 loc9 [V14,T05] ( 4, 4097.5) ubyte -> [rsp+0x34]
; V15 loc10 [V15,T03] ( 5, 4098 ) ubyte -> [rsp+0x30]
; V16 loc11 [V16,T00] ( 8,25216 ) long -> r13
; V17 loc12 [V17,T02] ( 2, 4608 ) long -> r8
; V18 loc13 [V18,T12] ( 3, 1152 ) simd16 -> mm1 ld-addr-op
; V19 loc14 [V19,T13] ( 3, 1152 ) simd16 -> mm0 ld-addr-op
; V20 loc15 [V20,T14] ( 3, 1152 ) simd16 -> mm0 ld-addr-op
; V21 loc16 [V21,T29] ( 5, 2.5) int -> rdx
; V22 loc17 [V22,T30] ( 5, 2.5) int -> rdi
; V23 tmp0 [V23,T27] ( 3, 6 ) int -> r8
; V24 tmp1 [V24,T24] ( 2, 512 ) long -> r8
; V25 tmp2 [V25,T17] ( 3, 768 ) long -> r8
; V26 tmp3 [V26,T07] ( 3, 3072 ) simd16 -> mm0
; V27 tmp4 [V27,T10] ( 3, 1536 ) simd16 -> mm6
; V28 tmp5 [V28,T16] ( 2, 1024 ) simd16 -> mm6
; V29 tmp6 [V29,T31] ( 2, 2 ) int -> rcx
; V30 tmp7 [V30,T26] ( 9, 10 ) int -> rax
; V31 tmp8 [V31,T15] ( 2, 1024 ) byref -> r13
; V32 tmp9 [V32 ] ( 9, 1542 ) simd16 -> [rsp+0x20] do-not-enreg[XSB] addr-exposed
; V33 OutArgs [V33 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 104
G_M3332_IG01:
4157 push r15
4156 push r14
4155 push r13
4154 push r12
57 push rdi
56 push rsi
55 push rbp
53 push rbx
4883EC68 sub rsp, 104
0F29742450 movaps qword ptr [rsp+50H], xmm6
4C8BF1 mov r14, rcx
488BFA mov rdi, rdx
498BD8 mov rbx, r8
498BE9 mov rbp, r9
488BB424D0000000 mov rsi, bword ptr [rsp+D0H]
G_M3332_IG02:
49833E00 cmp gword ptr [r14], 0
750A jne SHORT G_M3332_IG03
B8FFFFFFFF mov eax, -1
E90A040000 jmp G_M3332_IG31
G_M3332_IG03:
4D8B3E mov r15, gword ptr [r14]
458B6608 mov r12d, dword ptr [r14+8]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
418B5734 mov edx, dword ptr [r15+52]
448BC2 mov r8d, edx
452BC4 sub r8d, r12d
41B9FFFFFF7F mov r9d, 0x7FFFFFFF
C7442440FFFFFF7F mov dword ptr [rsp+40H], 0x7FFFFFFF
C744243CFFFFFF7F mov dword ptr [rsp+3CH], 0x7FFFFFFF
0FB617 movzx rdx, byte ptr [rdi]
0FB6D2 movzx rdx, dl
89542438 mov dword ptr [rsp+38H], edx
0FB60B movzx rcx, byte ptr [rbx]
0FB6C9 movzx rcx, cl
894C2434 mov dword ptr [rsp+34H], ecx
0FB64500 movzx rax, byte ptr [rbp]
0FB6C0 movzx rax, al
89442430 mov dword ptr [rsp+30H], eax
4585C0 test r8d, r8d
7513 jne SHORT G_M3332_IG05
G_M3332_IG04:
4C3B3E cmp r15, gword ptr [rsi]
7515 jne SHORT G_M3332_IG06
443B6608 cmp r12d, dword ptr [rsi+8]
410F9FC0 setg r8b
450FB6C0 movzx r8, r8b
EB0A jmp SHORT G_M3332_IG07
G_M3332_IG05:
44896C244C mov dword ptr [rsp+4CH], r13d
EB51 jmp SHORT G_M3332_IG10
G_M3332_IG06:
4533C0 xor r8d, r8d
G_M3332_IG07:
450BC5 or r8d, r13d
4585C0 test r8d, r8d
741C je SHORT G_M3332_IG08
498BCE mov rcx, r14
498BD7 mov rdx, r15
E89FEFB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B4E08 mov ecx, dword ptr [rsi+8]
41894E08 mov dword ptr [r14+8], ecx
B8FFFFFFFF mov eax, -1
E971030000 jmp G_M3332_IG31
G_M3332_IG08:
4D8B7F18 mov r15, gword ptr [r15+24]
458B6730 mov r12d, dword ptr [r15+48]
49837F1800 cmp gword ptr [r15+24], 0
410F94C5 sete r13b
450FB6ED movzx r13, r13b
44896C244C mov dword ptr [rsp+4CH], r13d
458B4734 mov r8d, dword ptr [r15+52]
452BC4 sub r8d, r12d
G_M3332_IG09:
4585C0 test r8d, r8d
0F849E010000 je G_M3332_IG15
G_M3332_IG10:
458B2F mov r13d, dword ptr [r15]
4D8D6F38 lea r13, bword ptr [r15+56]
4D8B6D00 mov r13, gword ptr [r13]
4585C0 test r8d, r8d
7EE7 jle SHORT G_M3332_IG09
G_M3332_IG11:
4489442448 mov dword ptr [rsp+48H], r8d
4183F810 cmp r8d, 16
0F8C0F020000 jl G_M3332_IG22
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
453B6508 cmp r12d, dword ptr [r13+8]
0F832C030000 jae G_M3332_IG32
458D74240F lea r14d, [r12+15]
453B7508 cmp r14d, dword ptr [r13+8]
0F831D030000 jae G_M3332_IG32
430F10442510 movups xmm0, xmmword ptr [r13+r12+16]
0F100F movups xmm1, xmmword ptr [rdi]
660F74C8 pcmpeqb xmm1, xmm0
0F28F0 movaps xmm6, xmm0
660FEFC0 pxor xmm0, xmm0
0F28D1 movaps xmm2, xmm1
660F76D0 pcmpeqd xmm2, xmm0
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7418 je SHORT G_M3332_IG12
0F294C2420 movaps xmmword ptr [rsp+20H], xmm1
488D4C2420 lea rcx, bword ptr [rsp+20H]
E81BEFFFFF call MemoryPoolIterator:FindFirstEqualByte(struct):int
89442444 mov dword ptr [rsp+44H], eax
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG12:
0F1003 movups xmm0, xmmword ptr [rbx]
660F74C6 pcmpeqb xmm0, xmm6
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
741D je SHORT G_M3332_IG13
44894C2444 mov dword ptr [rsp+44H], r9d
0F29442420 movaps xmmword ptr [rsp+20H], xmm0
488D4C2420 lea rcx, bword ptr [rsp+20H]
E8CDEEFFFF call MemoryPoolIterator:FindFirstEqualByte(struct):int
89442440 mov dword ptr [rsp+40H], eax
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG13:
0F104500 movups xmm0, xmmword ptr [rbp]
660F74C6 pcmpeqb xmm0, xmm6
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
66410F7ED6 movd r14d, xmm2
4181FEFFFFFFFF cmp r14d, 0xFFFFFFFF
7421 je SHORT G_M3332_IG14
44894C2444 mov dword ptr [rsp+44H], r9d
0F29442420 movaps xmmword ptr [rsp+20H], xmm0
488D4C2420 lea rcx, bword ptr [rsp+20H]
E87EEEFFFF call MemoryPoolIterator:FindFirstEqualByte(struct):int
448BF0 mov r14d, eax
448974243C mov dword ptr [rsp+3CH], r14d
448B4C2444 mov r9d, dword ptr [rsp+44H]
G_M3332_IG14:
44894C2444 mov dword ptr [rsp+44H], r9d
4181F9FFFFFF7F cmp r9d, 0x7FFFFFFF
7567 jne SHORT G_M3332_IG16
817C2440FFFFFF7F cmp dword ptr [rsp+40H], 0x7FFFFFFF
755D jne SHORT G_M3332_IG16
817C243CFFFFFF7F cmp dword ptr [rsp+3CH], 0x7FFFFFFF
7553 jne SHORT G_M3332_IG16
448B442448 mov r8d, dword ptr [rsp+48H]
4183C0F0 add r8d, -16
4183C410 add r12d, 16
4C3B3E cmp r15, gword ptr [rsi]
0F85C1010000 jne G_M3332_IG29
443B6608 cmp r12d, dword ptr [rsi+8]
0F8EB7010000 jle G_M3332_IG29
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E8DFEDB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B5608 mov edx, dword ptr [rsi+8]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E9A9010000 jmp G_M3332_IG31
G_M3332_IG15:
448B6C244C mov r13d, dword ptr [rsp+4CH]
E9EDFDFFFF jmp G_M3332_IG04
G_M3332_IG16:
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
498BCE mov rcx, r14
498BD7 mov rdx, r15
E8A9EDB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
448B4C2444 mov r9d, dword ptr [rsp+44H]
8B7C2440 mov edi, dword ptr [rsp+40H]
443BCF cmp r9d, edi
7D1C jge SHORT G_M3332_IG18
8B7C243C mov edi, dword ptr [rsp+3CH]
443BCF cmp r9d, edi
7D0B jge SHORT G_M3332_IG17
8B5C2438 mov ebx, dword ptr [rsp+38H]
8BD3 mov edx, ebx
418BF9 mov edi, r9d
EB23 jmp SHORT G_M3332_IG20
G_M3332_IG17:
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BD5 mov edx, ebp
EB1B jmp SHORT G_M3332_IG20
G_M3332_IG18:
448B6C243C mov r13d, dword ptr [rsp+3CH]
413BFD cmp edi, r13d
7D08 jge SHORT G_M3332_IG19
8B5C2434 mov ebx, dword ptr [rsp+34H]
8BD3 mov edx, ebx
EB09 jmp SHORT G_M3332_IG20
G_M3332_IG19:
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BD5 mov edx, ebp
418BFD mov edi, r13d
G_M3332_IG20:
418D0C3C lea ecx, [r12+rdi]
41894E08 mov dword ptr [r14+8], ecx
4C3B3E cmp r15, gword ptr [rsi]
751A jne SHORT G_M3332_IG21
418B4E08 mov ecx, dword ptr [r14+8]
3B4E08 cmp ecx, dword ptr [rsi+8]
7E11 jle SHORT G_M3332_IG21
8B5608 mov edx, dword ptr [rsi+8]
41895608 mov dword ptr [r14+8], edx
B8FFFFFFFF mov eax, -1
E920010000 jmp G_M3332_IG31
G_M3332_IG21:
8BC2 mov eax, edx
E919010000 jmp G_M3332_IG31
G_M3332_IG22:
4D63EC movsxd r13, r12d
4D036F28 add r13, qword ptr [r15+40]
4C3B3E cmp r15, gword ptr [rsi]
740F je SHORT G_M3332_IG23
448B442448 mov r8d, dword ptr [rsp+48H]
4D63C0 movsxd r8, r8d
4D03C5 add r8, r13
EB23 jmp SHORT G_M3332_IG24
G_M3332_IG23:
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
4D8B4728 mov r8, qword ptr [r15+40]
448B7608 mov r14d, dword ptr [rsi+8]
4D63F6 movsxd r14, r14d
4F8D443001 lea r8, [r8+r14+1]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
G_M3332_IG24:
4C89B424B0000000 mov bword ptr [rsp+B0H], r14
G_M3332_IG25:
450FB67500 movzx r14, byte ptr [r13]
8B542438 mov edx, dword ptr [rsp+38H]
443BF2 cmp r14d, edx
7527 jne SHORT G_M3332_IG26
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E8D7ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B5C2438 mov ebx, dword ptr [rsp+38H]
8BC3 mov eax, ebx
E9A0000000 jmp G_M3332_IG31
G_M3332_IG26:
450FB67500 movzx r14, byte ptr [r13]
8B4C2434 mov ecx, dword ptr [rsp+34H]
443BF1 cmp r14d, ecx
7524 jne SHORT G_M3332_IG27
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E8A2ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B5C2434 mov ebx, dword ptr [rsp+34H]
8BC3 mov eax, ebx
EB6E jmp SHORT G_M3332_IG31
G_M3332_IG27:
450FB67500 movzx r14, byte ptr [r13]
8B442430 mov eax, dword ptr [rsp+30H]
443BF0 cmp r14d, eax
7524 jne SHORT G_M3332_IG28
488B8C24B0000000 mov rcx, bword ptr [rsp+B0H]
498BD7 mov rdx, r15
E870ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
45896608 mov dword ptr [r14+8], r12d
8B6C2430 mov ebp, dword ptr [rsp+30H]
8BC5 mov eax, ebp
EB3C jmp SHORT G_M3332_IG31
G_M3332_IG28:
49FFC5 inc r13
41FFC4 inc r12d
4D3BE8 cmp r13, r8
0F8558FFFFFF jne G_M3332_IG25
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
448B6C244C mov r13d, dword ptr [rsp+4CH]
E96CFCFFFF jmp G_M3332_IG04
G_M3332_IG29:
4585C0 test r8d, r8d
448B4C2444 mov r9d, dword ptr [rsp+44H]
4C8BB424B0000000 mov r14, bword ptr [rsp+B0H]
0F8FD1FCFFFF jg G_M3332_IG11
G_M3332_IG30:
E9B3FCFFFF jmp G_M3332_IG09
G_M3332_IG31:
0F28742450 movaps xmm6, qword ptr [rsp+50H]
4883C468 add rsp, 104
5B pop rbx
5D pop rbp
5E pop rsi
5F pop rdi
415C pop r12
415D pop r13
415E pop r14
415F pop r15
C3 ret
G_M3332_IG32:
E835448D5F call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 1116, prolog size 41 for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:thisInlines into 06000168 MemoryPoolIterator:FindFirstEqualByte(struct):int
[0 IL=0008 TR=000272 06000169] [FAILED: too many il bytes] MemoryPoolIterator:FindFirstEqualByteSlow(struct):int
[1 IL=0015 TR=000015 0600009D] [below ALWAYS_INLINE size] Vector:AsVectorInt64(struct):struct
[0 IL=0171 TR=000264 06002CA9] [FAILED: unprofitable inline] InvalidOperationException:.ctor():this
Budget: initialTime=591, finalTime=591, initialBudget=5910, currentBudget=5910
Budget: initialSize=4166, finalSize=4166
; Assembly listing for method MemoryPoolIterator:FindFirstEqualByte(struct):int
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; fully interruptible
; Final local variable assignments
;
; V00 arg0 [V00,T02] ( 6, 6 ) byref -> rsi
; V01 loc0 [V01,T05] ( 2, 4.5) simd16 -> mm0 ld-addr-op
; V02 loc1 [V02,T00] ( 7, 21 ) int -> rcx
; V03 loc2 [V03,T01] ( 9, 11.5) long -> rax
; V04 tmp0 [V04,T07] ( 3, 1.5) int -> rcx
; V05 tmp1 [V05,T08] ( 3, 1.5) int -> rcx
; V06 tmp2 [V06,T09] ( 3, 1.5) int -> rcx
; V07 tmp3 [V07,T03] ( 9, 4.5) int -> rcx
; V08 tmp4 [V08,T04] ( 9, 4.5) int -> rdx
; V09 tmp5 [V09,T10] ( 3, 1.5) int -> rcx
; V10 tmp6 [V10,T11] ( 3, 1.5) int -> rcx
; V11 tmp7 [V11,T12] ( 3, 1.5) int -> rcx
; V12 tmp8 [V12,T13] ( 3, 1.5) int -> rcx
; V13 tmp9 [V13,T14] ( 3, 0 ) ref -> rsi
; V14 tmp10 [V14,T06] ( 2, 2 ) simd16 -> mm0
; V15 tmp11 [V15 ] ( 3, 4 ) simd16 -> [rsp+0x30] do-not-enreg[XSB] addr-exposed
; V16 OutArgs [V16 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
; V17 rat0 [V17 ] ( 1, 1 ) simd16 -> [rsp+0x20] do-not-enreg[XS] must-init addr-exposed
;
; Lcl frame size = 64
G_M48472_IG01:
56 push rsi
4883EC40 sub rsp, 64
33C0 xor rax, rax
4889442420 mov qword ptr [rsp+20H], rax
4889442428 mov qword ptr [rsp+28H], rax
488BF1 mov rsi, rcx
G_M48472_IG02:
48B9303E5C6CFD7F0000 mov rcx, 0x7FFD6C5C3E30
BA03000000 mov edx, 3
E8D821B15F call CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
0FB60DD393E8FF movzx rcx, byte ptr [reloc classVar[0x6c7862e0]]
85C9 test ecx, ecx
7519 jne SHORT G_M48472_IG04
0F1006 movups xmm0, xmmword ptr [rsi]
0F29442430 movaps xmmword ptr [rsp+30H], xmm0
488D4C2430 lea rcx, bword ptr [rsp+30H]
E81B6CFFFF call MemoryPoolIterator:FindFirstEqualByteSlow(struct):int
90 nop
G_M48472_IG03:
4883C440 add rsp, 64
5E pop rsi
C3 ret
G_M48472_IG04:
0F1006 movups xmm0, xmmword ptr [rsi]
33C9 xor ecx, ecx
G_M48472_IG05:
83F902 cmp ecx, 2
0F83E8000000 jae G_M48472_IG17
0F11442420 movups xmmword ptr [rsp+20H], xmm0
488B44CC20 mov rax, qword ptr [rsp+8*rcx+20H]
4885C0 test rax, rax
0F84A7000000 je G_M48472_IG15
C1E103 shl ecx, 3
BAFFFFFFFF mov edx, 0xFFFFFFFF
4823D0 and rdx, rax
4885D2 test rdx, rdx
7F52 jg SHORT G_M48472_IG09
48BA00000000FFFF0000 mov rdx, 0xFFFF00000000
4823D0 and rdx, rax
4885D2 test rdx, rdx
7F20 jg SHORT G_M48472_IG07
48BA000000000000FF00 mov rdx, 0xFF000000000000
4823C2 and rax, rdx
4885C0 test rax, rax
7F07 jg SHORT G_M48472_IG06
BA07000000 mov edx, 7
EB63 jmp SHORT G_M48472_IG13
G_M48472_IG06:
BA06000000 mov edx, 6
EB5C jmp SHORT G_M48472_IG13
G_M48472_IG07:
48BA00000000FF000000 mov rdx, 0xFF00000000
4823C2 and rax, rdx
4885C0 test rax, rax
7F07 jg SHORT G_M48472_IG08
BA05000000 mov edx, 5
EB43 jmp SHORT G_M48472_IG13
G_M48472_IG08:
BA04000000 mov edx, 4
EB3C jmp SHORT G_M48472_IG13
G_M48472_IG09:
488BD0 mov rdx, rax
4881E2FFFF0000 and rdx, 0xFFFF
4885D2 test rdx, rdx
7F19 jg SHORT G_M48472_IG11
48250000FF00 and rax, 0xFF0000
4885C0 test rax, rax
7F07 jg SHORT G_M48472_IG10
BA03000000 mov edx, 3
EB1B jmp SHORT G_M48472_IG13
G_M48472_IG10:
BA02000000 mov edx, 2
EB14 jmp SHORT G_M48472_IG13
G_M48472_IG11:
4825FF000000 and rax, 255
4885C0 test rax, rax
7F07 jg SHORT G_M48472_IG12
BA01000000 mov edx, 1
EB02 jmp SHORT G_M48472_IG13
G_M48472_IG12:
33D2 xor edx, edx
G_M48472_IG13:
8D0411 lea eax, [rcx+rdx]
G_M48472_IG14:
4883C440 add rsp, 64
5E pop rsi
C3 ret
G_M48472_IG15:
FFC1 inc ecx
83F902 cmp ecx, 2
0F8C32FFFFFF jl G_M48472_IG05
G_M48472_IG16:
48B9E8B01AC5FD7F0000 mov rcx, 0x7FFDC51AB0E8
E8821AB15F call CORINFO_HELP_NEWSFAST
488BF0 mov rsi, rax
488BCE mov rcx, rsi
E8BFF63C58 call InvalidOperationException:.ctor():this
488BCE mov rcx, rsi
E82F548D5F call CORINFO_HELP_THROW
CC int3
G_M48472_IG17:
E8A9568D5F call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 328, prolog size 20 for method MemoryPoolIterator:FindFirstEqualByte(struct):int |
|
@halter73 btw I'm saying the perf improvement looks like its passing these Vectors by ref https://github.com/aspnet/KestrelHttpServer/blob/dev/src/Microsoft.AspNetCore.Server.Kestrel/Internal/Http/Frame.cs#L39-L45 Doesn't look like it makes much odds once in-stack though... |
|
@benaadams - yes Seek() methods looks the same as with passing copy by ref. |
|
@sivarv it looks the same, except it performs an additional load from stack and then copy back to stack to call |
|
Just changing to byval but not rearranging reduces the |
|
Re-arranging would make byte0Equals, byte1Equals and byte2Equals have non-overlapping life times plus it would avoid these being live across the call to FindFirstEqualByte(). For that reason, they would remain in registers. |
|
@sivarv with the forced inlining and improved code in #1138 I just need to slightly rearrange to get the reduced registers and only register use (seen in second commit a0ecae4) so will add that there as its just a minor change and close this. inlines and asm for reference Successfully inlined MemoryPoolIterator:get_IsDefault():bool:this (10 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolBlock:get_Array():ref:this (12 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined ArraySegment`1:get_Array():ref:this (7 IL bytes) (depth 2) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:LocateFirstFoundByte(byref):int (64 IL bytes) (depth 1) [aggressive inline attribute]
Successfully inlined Vector:AsVectorInt64(struct):struct (7 IL bytes) (depth 2) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:LocateFirstFoundByte(byref):int (64 IL bytes) (depth 1) [aggressive inline attribute]
Successfully inlined Vector:AsVectorInt64(struct):struct (7 IL bytes) (depth 2) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:LocateFirstFoundByte(byref):int (64 IL bytes) (depth 1) [aggressive inline attribute]
Successfully inlined Vector:AsVectorInt64(struct):struct (7 IL bytes) (depth 2) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Block():ref:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
Successfully inlined MemoryPoolIterator:get_Index():int:this (7 IL bytes) (depth 1) [below ALWAYS_INLINE size]
**************** Inline Tree
Inlines into 06000167 MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
[1 IL=0001 TR=000002 06000159] [below ALWAYS_INLINE size] MemoryPoolIterator:get_IsDefault():bool:this
[2 IL=0098 TR=000076 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[3 IL=0108 TR=000141 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[4 IL=0132 TR=000129 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[5 IL=0183 TR=000151 06000151] [below ALWAYS_INLINE size] MemoryPoolBlock:get_Array():ref:this
[6 IL=0006 TR=000639 060038EC] [below ALWAYS_INLINE size] ArraySegment`1:get_Array():ref:this
[7 IL=0254 TR=000604 06000168] [aggressive inline attribute] MemoryPoolIterator:LocateFirstFoundByte(byref):int
[8 IL=0006 TR=000657 0600009D] [below ALWAYS_INLINE size] Vector:AsVectorInt64(struct):struct
[9 IL=0291 TR=000591 06000168] [aggressive inline attribute] MemoryPoolIterator:LocateFirstFoundByte(byref):int
[10 IL=0006 TR=000756 0600009D] [below ALWAYS_INLINE size] Vector:AsVectorInt64(struct):struct
[11 IL=0327 TR=000581 06000168] [aggressive inline attribute] MemoryPoolIterator:LocateFirstFoundByte(byref):int
[12 IL=0006 TR=000855 0600009D] [below ALWAYS_INLINE size] Vector:AsVectorInt64(struct):struct
[13 IL=0380 TR=000546 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[14 IL=0393 TR=000555 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[15 IL=0413 TR=000569 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[16 IL=0501 TR=000450 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[17 IL=0516 TR=000464 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[18 IL=0526 TR=000477 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
[19 IL=0554 TR=000184 0600015B] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Block():ref:this
[20 IL=0575 TR=000194 0600015C] [below ALWAYS_INLINE size] MemoryPoolIterator:get_Index():int:this
Budget: initialTime=2136, finalTime=2494, initialBudget=21360, currentBudget=21702
Budget: increased by 342 because of force inlines
Budget: initialSize=15908, finalSize=15908; Assembly listing for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; fully interruptible
; Final local variable assignments
;
; V00 this [V00,T40] ( 19, 11 ) byref -> [rsp+0x90] this
; V01 arg1 [V01,T30] ( 4, 514.5) byref -> [rsp+0x98]
; V02 arg2 [V02,T31] ( 4, 514.5) byref -> [rsp+0xA0]
; V03 arg3 [V03,T32] ( 4, 514.5) byref -> r9
; V04 arg4 [V04,T17] ( 11, 1286.5) byref -> rsi
; V05 loc0 [V05,T11] ( 22, 2193 ) ref -> rbx
; V06 loc1 [V06,T01] ( 18,11401 ) int -> rbp
; V07 loc2 [V07,T43] ( 3, 4.5) bool -> [rsp+0x44]
; V08 loc3 [V08,T06] ( 10, 3955 ) int -> r15
; V09 loc4 [V09,T12] ( 4, 1792 ) ref -> r14
; V10 loc5 [V10,T25] ( 6, 642 ) int -> r12
; V11 loc6 [V11,T26] ( 6, 642 ) int -> r13
; V12 loc7 [V12,T24] ( 7, 642.5) int -> [rsp+0x40]
; V13 loc8 [V13,T04] ( 4, 4097.5) ubyte -> [rsp+0x3C]
; V14 loc9 [V14,T05] ( 4, 4097.5) ubyte -> [rsp+0x38]
; V15 loc10 [V15,T03] ( 5, 4098 ) ubyte -> [rsp+0x34]
; V16 loc11 [V16,T00] ( 8,25216 ) long -> r14
; V17 loc12 [V17,T02] ( 2, 4608 ) long -> r15
; V18 loc13 [V18,T18] ( 3, 1152 ) simd16 -> mm1 ld-addr-op
; V19 loc14 [V19,T19] ( 3, 1152 ) simd16 -> mm1 ld-addr-op
; V20 loc15 [V20,T20] ( 3, 1152 ) simd16 -> mm0 ld-addr-op
; V21 loc16 [V21,T44] ( 5, 2.5) int -> rcx
; V22 loc17 [V22,T45] ( 5, 2.5) int -> r13
; V23 tmp0 [V23,T42] ( 3, 6 ) int -> rbp
; V24 tmp1 [V24,T33] ( 2, 512 ) long -> r15
; V25 tmp2 [V25,T23] ( 3, 768 ) long -> r15
; V26 tmp3 [V26,T07] ( 3, 3072 ) simd16 -> mm0
; V27 tmp4 [V27,T13] ( 3, 1536 ) simd16 -> mm0
; V28 tmp5 [V28,T22] ( 2, 1024 ) simd16 -> mm0
; V29 tmp6 [V29,T46] ( 2, 2 ) int -> rdx
; V30 tmp7 [V30,T41] ( 9, 10 ) int -> rax
; V31 tmp8 [V31,T21] ( 2, 1024 ) byref -> r14
; V32 tmp9 [V32,T27] ( 2, 640 ) simd16 -> mm1 ld-addr-op
; V33 tmp10 [V33,T08] ( 7, 2816 ) int -> r12
; V34 tmp11 [V34,T14] ( 4, 1376 ) long -> rdi
; V35 tmp12 [V35,T37] ( 2, 256 ) int -> rdx
; V36 tmp13 [V36,T34] ( 2, 512 ) simd16 -> mm1 ld-addr-op
; V37 tmp14 [V37,T28] ( 2, 640 ) simd16 -> mm1 ld-addr-op
; V38 tmp15 [V38,T09] ( 7, 2816 ) int -> r13
; V39 tmp16 [V39,T15] ( 4, 1376 ) long -> rdi
; V40 tmp17 [V40,T38] ( 2, 256 ) int -> r8
; V41 tmp18 [V41,T35] ( 2, 512 ) simd16 -> mm1 ld-addr-op
; V42 tmp19 [V42,T29] ( 2, 640 ) simd16 -> mm0 ld-addr-op
; V43 tmp20 [V43,T10] ( 7, 2816 ) int -> r10
; V44 tmp21 [V44,T16] ( 4, 1376 ) long -> rdi
; V45 tmp22 [V45,T39] ( 2, 256 ) int -> r8
; V46 tmp23 [V46,T36] ( 2, 512 ) simd16 -> mm0 ld-addr-op
; V47 OutArgs [V47 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
; V48 rat0 [V48 ] ( 1, 1 ) simd16 -> [rsp+0x20] do-not-enreg[XS] must-init addr-exposed
;
; Lcl frame size = 72
G_M3338_IG01:
4157 push r15
4156 push r14
4155 push r13
4154 push r12
57 push rdi
56 push rsi
55 push rbp
53 push rbx
4883EC48 sub rsp, 72
33C0 xor rax, rax
4889442420 mov qword ptr [rsp+20H], rax
4889442428 mov qword ptr [rsp+28H], rax
488BF9 mov rdi, rcx
488BB424B0000000 mov rsi, bword ptr [rsp+B0H]
G_M3338_IG02:
48833F00 cmp gword ptr [rdi], 0
750A jne SHORT G_M3338_IG03
B8FFFFFFFF mov eax, -1
E987040000 jmp G_M3338_IG39
G_M3338_IG03:
488B1F mov rbx, gword ptr [rdi]
8B6F08 mov ebp, dword ptr [rdi+8]
48837B1800 cmp gword ptr [rbx+24], 0
410F94C6 sete r14b
450FB6F6 movzx r14, r14b
8B4B34 mov ecx, dword ptr [rbx+52]
448BF9 mov r15d, ecx
442BFD sub r15d, ebp
41BCFFFFFF7F mov r12d, 0x7FFFFFFF
41BDFFFFFF7F mov r13d, 0x7FFFFFFF
41BAFFFFFF7F mov r10d, 0x7FFFFFFF
0FB60A movzx rcx, byte ptr [rdx]
440FB6D9 movzx r11, cl
44895C243C mov dword ptr [rsp+3CH], r11d
410FB608 movzx rcx, byte ptr [r8]
0FB6C9 movzx rcx, cl
894C2438 mov dword ptr [rsp+38H], ecx
410FB601 movzx rax, byte ptr [r9]
0FB6C0 movzx rax, al
89442434 mov dword ptr [rsp+34H], eax
4585FF test r15d, r15d
7512 jne SHORT G_M3338_IG05
G_M3338_IG04:
483B1E cmp rbx, gword ptr [rsi]
7514 jne SHORT G_M3338_IG06
3B6E08 cmp ebp, dword ptr [rsi+8]
400F9FC5 setg bpl
400FB6ED movzx rbp, bpl
EB09 jmp SHORT G_M3338_IG07
G_M3338_IG05:
4489742444 mov dword ptr [rsp+44H], r14d
EB4D jmp SHORT G_M3338_IG10
G_M3338_IG06:
33ED xor ebp, ebp
G_M3338_IG07:
410BEE or ebp, r14d
85ED test ebp, ebp
741B je SHORT G_M3338_IG08
488BCF mov rcx, rdi
488BD3 mov rdx, rbx
E8F7EFB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B5608 mov edx, dword ptr [rsi+8]
895708 mov dword ptr [rdi+8], edx
B8FFFFFFFF mov eax, -1
E9F5030000 jmp G_M3338_IG39
G_M3338_IG08:
488B5B18 mov rbx, gword ptr [rbx+24]
8B6B30 mov ebp, dword ptr [rbx+48]
48837B1800 cmp gword ptr [rbx+24], 0
410F94C6 sete r14b
450FB6F6 movzx r14, r14b
4489742444 mov dword ptr [rsp+44H], r14d
448B7B34 mov r15d, dword ptr [rbx+52]
442BFD sub r15d, ebp
G_M3338_IG09:
4585FF test r15d, r15d
0F841E020000 je G_M3338_IG23
G_M3338_IG10:
448B33 mov r14d, dword ptr [rbx]
4C8D7338 lea r14, bword ptr [rbx+56]
4D8B36 mov r14, gword ptr [r14]
4585FF test r15d, r15d
7EE8 jle SHORT G_M3338_IG09
G_M3338_IG11:
4183FF10 cmp r15d, 16
0F8C93020000 jl G_M3338_IG30
4889BC2490000000 mov bword ptr [rsp+90H], rdi
413B6E08 cmp ebp, dword ptr [r14+8]
0F83B2030000 jae G_M3338_IG40
8D7D0F lea edi, [rbp+15]
413B7E08 cmp edi, dword ptr [r14+8]
0F83A5030000 jae G_M3338_IG40
410F10442E10 movups xmm0, xmmword ptr [r14+rbp+16]
4889942498000000 mov bword ptr [rsp+98H], rdx
0F100A movups xmm1, xmmword ptr [rdx]
660F74C8 pcmpeqb xmm1, xmm0
660FEFD2 pxor xmm2, xmm2
0F28D9 movaps xmm3, xmm1
660F76DA pcmpeqd xmm3, xmm2
660F70E34E pshufd xmm4, xmm3, 78
0F54DC andps xmm3, xmm4
660F70E301 pshufd xmm4, xmm3, 1
660FDBDC pand xmm3, xmm4
660F7EDF movd edi, xmm3
81FFFFFFFFFF cmp edi, 0xFFFFFFFF
7444 je SHORT G_M3338_IG14
4533E4 xor r12d, r12d
G_M3338_IG12:
4183FC02 cmp r12d, 2
0F835B030000 jae G_M3338_IG40
0F114C2420 movups xmmword ptr [rsp+20H], xmm1
4A8B7CE420 mov rdi, qword ptr [rsp+8*r12+20H]
4885FF test rdi, rdi
7509 jne SHORT G_M3338_IG13
41FFC4 inc r12d
4183FC02 cmp r12d, 2
7CDE jl SHORT G_M3338_IG12
G_M3338_IG13:
488BD7 mov rdx, rdi
48F7DA neg rdx
4823D7 and rdx, rdi
48BFE0C0A08060402000 mov rdi, 0x20406080A0C0E0
480FAFD7 imul rdx, rdi
48C1EA3D shr rdx, 61
468D24E2 lea r12d, [rdx+8*r12]
G_M3338_IG14:
4C898424A0000000 mov bword ptr [rsp+A0H], r8
410F1008 movups xmm1, xmmword ptr [r8]
660F74C8 pcmpeqb xmm1, xmm0
660FEFD2 pxor xmm2, xmm2
0F28D9 movaps xmm3, xmm1
660F76DA pcmpeqd xmm3, xmm2
660F70E34E pshufd xmm4, xmm3, 78
0F54DC andps xmm3, xmm4
660F70E301 pshufd xmm4, xmm3, 1
660FDBDC pand xmm3, xmm4
660F7EDF movd edi, xmm3
81FFFFFFFFFF cmp edi, 0xFFFFFFFF
7444 je SHORT G_M3338_IG17
4533ED xor r13d, r13d
G_M3338_IG15:
4183FD02 cmp r13d, 2
0F83DF020000 jae G_M3338_IG40
0F114C2420 movups xmmword ptr [rsp+20H], xmm1
4A8B7CEC20 mov rdi, qword ptr [rsp+8*r13+20H]
4885FF test rdi, rdi
7509 jne SHORT G_M3338_IG16
41FFC5 inc r13d
4183FD02 cmp r13d, 2
7CDE jl SHORT G_M3338_IG15
G_M3338_IG16:
4C8BC7 mov r8, rdi
49F7D8 neg r8
4C23C7 and r8, rdi
48BFE0C0A08060402000 mov rdi, 0x20406080A0C0E0
4C0FAFC7 imul r8, rdi
49C1E83D shr r8, 61
478D2CE8 lea r13d, [r8+8*r13]
G_M3338_IG17:
410F1009 movups xmm1, xmmword ptr [r9]
660F74C1 pcmpeqb xmm0, xmm1
660FEFC9 pxor xmm1, xmm1
0F28D0 movaps xmm2, xmm0
660F76D1 pcmpeqd xmm2, xmm1
660F70DA4E pshufd xmm3, xmm2, 78
0F54D3 andps xmm2, xmm3
660F70DA01 pshufd xmm3, xmm2, 1
660FDBD3 pand xmm2, xmm3
660F7ED7 movd edi, xmm2
81FFFFFFFFFF cmp edi, 0xFFFFFFFF
744A je SHORT G_M3338_IG20
4533D2 xor r10d, r10d
G_M3338_IG18:
4183FA02 cmp r10d, 2
0F836B020000 jae G_M3338_IG40
0F11442420 movups xmmword ptr [rsp+20H], xmm0
4A8B7CD420 mov rdi, qword ptr [rsp+8*r10+20H]
4885FF test rdi, rdi
7509 jne SHORT G_M3338_IG19
41FFC2 inc r10d
4183FA02 cmp r10d, 2
7CDE jl SHORT G_M3338_IG18
G_M3338_IG19:
4C8BC7 mov r8, rdi
49F7D8 neg r8
4C23C7 and r8, rdi
48BFE0C0A08060402000 mov rdi, 0x20406080A0C0E0
4C0FAFC7 imul r8, rdi
49C1E83D shr r8, 61
478D14D0 lea r10d, [r8+8*r10]
418BFA mov edi, r10d
448BD7 mov r10d, edi
G_M3338_IG20:
4181FCFFFFFF7F cmp r12d, 0x7FFFFFFF
755F jne SHORT G_M3338_IG22
4181FDFFFFFF7F cmp r13d, 0x7FFFFFFF
754F jne SHORT G_M3338_IG21
4489542440 mov dword ptr [rsp+40H], r10d
4181FAFFFFFF7F cmp r10d, 0x7FFFFFFF
7559 jne SHORT G_M3338_IG24
4183C7F0 add r15d, -16
83C510 add ebp, 16
483B1E cmp rbx, gword ptr [rsi]
0F85C2010000 jne G_M3338_IG37
3B6E08 cmp ebp, dword ptr [rsi+8]
0F8EB9010000 jle G_M3338_IG37
488B8C2490000000 mov rcx, bword ptr [rsp+90H]
488BD3 mov rdx, rbx
E8C6EDB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
8B5608 mov edx, dword ptr [rsi+8]
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
895708 mov dword ptr [rdi+8], edx
B8FFFFFFFF mov eax, -1
E9BC010000 jmp G_M3338_IG39
G_M3338_IG21:
4489542440 mov dword ptr [rsp+40H], r10d
EB11 jmp SHORT G_M3338_IG24
G_M3338_IG22:
4489542440 mov dword ptr [rsp+40H], r10d
EB0A jmp SHORT G_M3338_IG24
G_M3338_IG23:
448B742444 mov r14d, dword ptr [rsp+44H]
E972FDFFFF jmp G_M3338_IG04
G_M3338_IG24:
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
488BCF mov rcx, rdi
488BD3 mov rdx, rbx
E883EDB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
453BE5 cmp r12d, r13d
7D21 jge SHORT G_M3338_IG26
448B6C2440 mov r13d, dword ptr [rsp+40H]
453BE5 cmp r12d, r13d
7D0D jge SHORT G_M3338_IG25
448B74243C mov r14d, dword ptr [rsp+3CH]
418BCE mov ecx, r14d
458BEC mov r13d, r12d
EB29 jmp SHORT G_M3338_IG28
G_M3338_IG25:
448B642434 mov r12d, dword ptr [rsp+34H]
418BCC mov ecx, r12d
EB1F jmp SHORT G_M3338_IG28
G_M3338_IG26:
448B742440 mov r14d, dword ptr [rsp+40H]
453BEE cmp r13d, r14d
7D0A jge SHORT G_M3338_IG27
448B7C2438 mov r15d, dword ptr [rsp+38H]
418BCF mov ecx, r15d
EB0B jmp SHORT G_M3338_IG28
G_M3338_IG27:
448B642434 mov r12d, dword ptr [rsp+34H]
418BCC mov ecx, r12d
458BEE mov r13d, r14d
G_M3338_IG28:
428D542D00 lea edx, [rbp+r13]
895708 mov dword ptr [rdi+8], edx
483B1E cmp rbx, gword ptr [rsi]
7518 jne SHORT G_M3338_IG29
8B5708 mov edx, dword ptr [rdi+8]
3B5608 cmp edx, dword ptr [rsi+8]
7E10 jle SHORT G_M3338_IG29
8B4E08 mov ecx, dword ptr [rsi+8]
894F08 mov dword ptr [rdi+8], ecx
B8FFFFFFFF mov eax, -1
E927010000 jmp G_M3338_IG39
G_M3338_IG29:
8BC1 mov eax, ecx
E920010000 jmp G_M3338_IG39
G_M3338_IG30:
4C63F5 movsxd r14, ebp
4C037328 add r14, qword ptr [rbx+40]
483B1E cmp rbx, gword ptr [rsi]
740A je SHORT G_M3338_IG31
4D63FF movsxd r15, r15d
4D03FE add r15, r14
EB21 jmp SHORT G_M3338_IG32
G_M3338_IG31:
4889BC2490000000 mov bword ptr [rsp+90H], rdi
4C8B7B28 mov r15, qword ptr [rbx+40]
8B7E08 mov edi, dword ptr [rsi+8]
4863FF movsxd rdi, edi
4D8D7C3F01 lea r15, [r15+rdi+1]
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
G_M3338_IG32:
4889BC2490000000 mov bword ptr [rsp+90H], rdi
G_M3338_IG33:
410FB63E movzx rdi, byte ptr [r14]
448B5C243C mov r11d, dword ptr [rsp+3CH]
413BFB cmp edi, r11d
7528 jne SHORT G_M3338_IG34
488B8C2490000000 mov rcx, bword ptr [rsp+90H]
488BD3 mov rdx, rbx
E8B9ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
896F08 mov dword ptr [rdi+8], ebp
448B74243C mov r14d, dword ptr [rsp+3CH]
418BC6 mov eax, r14d
E9AD000000 jmp G_M3338_IG39
G_M3338_IG34:
410FB63E movzx rdi, byte ptr [r14]
8B4C2438 mov ecx, dword ptr [rsp+38H]
3BF9 cmp edi, ecx
7525 jne SHORT G_M3338_IG35
488B8C2490000000 mov rcx, bword ptr [rsp+90H]
488BD3 mov rdx, rbx
E885ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
896F08 mov dword ptr [rdi+8], ebp
448B7C2438 mov r15d, dword ptr [rsp+38H]
418BC7 mov eax, r15d
EB7C jmp SHORT G_M3338_IG39
G_M3338_IG35:
410FB63E movzx rdi, byte ptr [r14]
8B442434 mov eax, dword ptr [rsp+34H]
3BF8 cmp edi, eax
7525 jne SHORT G_M3338_IG36
488B8C2490000000 mov rcx, bword ptr [rsp+90H]
488BD3 mov rdx, rbx
E854ECB05F call CORINFO_HELP_CHECKED_ASSIGN_REF
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
896F08 mov dword ptr [rdi+8], ebp
448B642434 mov r12d, dword ptr [rsp+34H]
418BC4 mov eax, r12d
EB4B jmp SHORT G_M3338_IG39
G_M3338_IG36:
49FFC6 inc r14
FFC5 inc ebp
4D3BF7 cmp r14, r15
0F855AFFFFFF jne G_M3338_IG33
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
448B742444 mov r14d, dword ptr [rsp+44H]
E9FBFBFFFF jmp G_M3338_IG04
G_M3338_IG37:
4585FF test r15d, r15d
488B942498000000 mov rdx, bword ptr [rsp+98H]
488BBC2490000000 mov rdi, bword ptr [rsp+90H]
4C8B8424A0000000 mov r8, bword ptr [rsp+A0H]
448B542440 mov r10d, dword ptr [rsp+40H]
0F8F4AFCFFFF jg G_M3338_IG11
G_M3338_IG38:
E92DFCFFFF jmp G_M3338_IG09
G_M3338_IG39:
4883C448 add rsp, 72
5B pop rbx
5D pop rbp
5E pop rsi
5F pop rdi
415C pop r12
415D pop r13
415E pop r14
415F pop r15
C3 ret
G_M3338_IG40:
E80E448D5F call CORINFO_HELP_RNGCHKFAIL
CC int3
; Total bytes of code 1235, prolog size 39 for method MemoryPoolIterator:Seek(byref,byref,byref,byref):int:this |
|
Sounds good to me. |
Passing a byref vector byref to another function causes it to spill to stack. Taking a local copy and then passing that to the function keeps it as a register.
Resolves #1141