diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64 b/benches/ref_from_bytes_dynamic_padding.x86-64 index e844a4608f..5177a4ce95 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_dynamic_padding.x86-64 @@ -1,22 +1,24 @@ bench_ref_from_bytes_dynamic_padding: - test dil, 3 - jne .LBB5_3 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_3 + setb cl + test dil, 3 + setne dl + or dl, cl + jne .LBB5_1 add rax, -9 movabs rcx, -6148914691236517205 mul rcx shr rdx - lea rax, [rdx + 2*rdx] - or rax, 3 - add rax, 9 - cmp rsi, rax - je .LBB5_4 -.LBB5_3: - xor edi, edi - mov rdx, rsi -.LBB5_4: - mov rax, rdi + lea rcx, [rdx + 2*rdx] + or rcx, 3 + add rcx, 9 + xor eax, eax + cmp rsi, rcx + cmovne rdx, rsi + cmove rax, rdi + ret +.LBB5_1: + xor eax, eax ret diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_dynamic_padding.x86-64.mca index 423ed38ba2..25a0d3e961 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 645 -Total uOps: 2000 +Instructions: 2200 +Total Cycles: 783 +Total uOps: 2500 Dispatch Width: 4 -uOps Per Cycle: 3.10 -IPC: 2.95 -Block RThroughput: 5.0 +uOps Per Cycle: 3.19 +IPC: 2.81 +Block RThroughput: 6.3 Instruction Info: @@ -18,24 +18,27 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_3 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_3 + 1 1 0.50 setb cl + 1 1 0.33 test dil, 3 + 1 1 0.50 setne dl + 1 1 0.33 or dl, cl + 1 1 1.00 jne .LBB5_1 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx 1 1 0.50 shr rdx - 1 1 0.50 lea rax, [rdx + 2*rdx] - 1 1 0.33 or rax, 3 - 1 1 0.33 add rax, 9 - 1 1 0.33 cmp rsi, rax - 1 1 1.00 je .LBB5_4 - 1 0 0.25 xor edi, edi - 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi + 1 1 0.50 lea rcx, [rdx + 2*rdx] + 1 1 0.33 or rcx, 3 + 1 1 0.33 add rcx, 9 + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp rsi, rcx + 2 2 0.67 cmovne rdx, rsi + 2 2 0.67 cmove rax, rdi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -52,26 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.32 6.33 - 6.35 - - + - - 7.65 7.67 - 7.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.64 0.35 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_3 - - - 0.34 0.65 - 0.01 - - movabs rax, 9223372036854775804 - - - 0.35 0.65 - - - - and rax, rsi - - - 0.33 0.34 - 0.33 - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_3 - - - 0.35 - - 0.65 - - add rax, -9 - - - 0.97 0.01 - 0.02 - - movabs rcx, -6148914691236517205 + - - - 0.99 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.04 0.95 - 0.01 - - and rax, rsi + - - 0.09 0.85 - 0.06 - - cmp rax, 9 + - - 0.50 - - 0.50 - - setb cl + - - 0.01 0.95 - 0.04 - - test dil, 3 + - - 0.36 - - 0.64 - - setne dl + - - 0.47 0.12 - 0.41 - - or dl, cl + - - - - - 1.00 - - jne .LBB5_1 + - - - 0.95 - 0.05 - - add rax, -9 + - - - 0.81 - 0.19 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.99 - - 0.01 - - shr rdx - - - 0.33 0.67 - - - - lea rax, [rdx + 2*rdx] - - - 0.34 0.66 - - - - or rax, 3 - - - 0.33 0.66 - 0.01 - - add rax, 9 - - - 0.01 0.99 - - - - cmp rsi, rax - - - - - - 1.00 - - je .LBB5_4 - - - - - - - - - xor edi, edi - - - 0.32 0.01 - 0.67 - - mov rdx, rsi - - - 0.02 0.34 - 0.64 - - mov rax, rdi + - - 0.62 - - 0.38 - - shr rdx + - - 0.62 0.38 - - - - lea rcx, [rdx + 2*rdx] + - - 0.59 0.17 - 0.24 - - or rcx, 3 + - - 0.61 0.19 - 0.20 - - add rcx, 9 + - - - - - - - - xor eax, eax + - - 0.75 0.24 - 0.01 - - cmp rsi, rcx + - - 1.00 0.03 - 0.97 - - cmovne rdx, rsi + - - 0.99 0.04 - 0.97 - - cmove rax, rdi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_dynamic_size.x86-64 b/benches/ref_from_bytes_dynamic_size.x86-64 index cc905b76c0..2ed6e32b05 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_ref_from_bytes_dynamic_size: - mov rdx, rsi cmp rsi, 4 setb al - or al, dil - test al, 1 - je .LBB5_2 + mov ecx, edi + or cl, al + test cl, 1 + jne .LBB5_1 + lea rcx, [rsi - 4] + mov rdx, rcx + shr rdx + and rcx, -2 + add rcx, 4 xor eax, eax + cmp rsi, rcx + cmovne rdx, rsi + cmove rax, rdi ret -.LBB5_2: - lea rcx, [rdx - 4] - mov rsi, rcx - and rsi, -2 - add rsi, 4 - shr rcx +.LBB5_1: xor eax, eax - cmp rdx, rsi - cmove rdx, rcx - cmove rax, rdi ret diff --git a/benches/ref_from_bytes_dynamic_size.x86-64.mca b/benches/ref_from_bytes_dynamic_size.x86-64.mca index 68aea583e4..7c90f65142 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 1800 -Total Cycles: 704 +Total Cycles: 606 Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 2.84 -IPC: 2.56 +uOps Per Cycle: 3.30 +IPC: 2.97 Block RThroughput: 5.0 @@ -18,23 +18,23 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi 1 1 0.33 cmp rsi, 4 1 1 0.50 setb al - 1 1 0.33 or al, dil - 1 1 0.33 test al, 1 - 1 1 1.00 je .LBB5_2 + 1 1 0.33 mov ecx, edi + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 + 1 1 1.00 jne .LBB5_1 + 1 1 0.50 lea rcx, [rsi - 4] + 1 1 0.33 mov rdx, rcx + 1 1 0.50 shr rdx + 1 1 0.33 and rcx, -2 + 1 1 0.33 add rcx, 4 1 0 0.25 xor eax, eax + 1 1 0.33 cmp rsi, rcx + 2 2 0.67 cmovne rdx, rsi + 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret - 1 1 0.50 lea rcx, [rdx - 4] - 1 1 0.33 mov rsi, rcx - 1 1 0.33 and rsi, -2 - 1 1 0.33 add rsi, 4 - 1 1 0.50 shr rcx 1 0 0.25 xor eax, eax - 1 1 0.33 cmp rdx, rsi - 2 2 0.67 cmove rdx, rcx - 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret @@ -51,25 +51,25 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.97 5.98 - 6.05 - - + - - 6.00 6.00 - 6.00 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.97 0.01 - 0.02 - - mov rdx, rsi - - - 0.01 0.02 - 0.97 - - cmp rsi, 4 - - - 0.03 - - 0.97 - - setb al - - - 0.01 0.02 - 0.97 - - or al, dil - - - - 0.98 - 0.02 - - test al, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.99 - - 0.01 - - cmp rsi, 4 + - - 1.00 - - - - - setb al + - - 0.98 0.02 - - - - mov ecx, edi + - - 0.98 0.01 - 0.01 - - or cl, al + - - 0.01 0.99 - - - - test cl, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.99 0.01 - - - - lea rcx, [rsi - 4] + - - 0.02 0.98 - - - - mov rdx, rcx + - - - - - 1.00 - - shr rdx + - - 0.99 0.01 - - - - and rcx, -2 + - - - 1.00 - - - - add rcx, 4 - - - - - - - - xor eax, eax + - - 0.02 0.98 - - - - cmp rsi, rcx + - - 0.01 1.00 - 0.99 - - cmovne rdx, rsi + - - 0.01 1.00 - 0.99 - - cmove rax, rdi - - - - - 1.00 - - ret - - - 0.98 0.02 - - - - lea rcx, [rdx - 4] - - - 0.01 0.99 - - - - mov rsi, rcx - - - - 0.98 - 0.02 - - and rsi, -2 - - - 0.98 0.01 - 0.01 - - add rsi, 4 - - - 0.99 - - 0.01 - - shr rcx - - - - - - - - xor eax, eax - - - 0.02 0.97 - 0.01 - - cmp rdx, rsi - - - 0.99 0.99 - 0.02 - - cmove rdx, rcx - - - 0.98 0.99 - 0.03 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 index d579b3faef..1ab816b4cc 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -1,19 +1,17 @@ bench_ref_from_bytes_with_elems_dynamic_padding: - movabs rax, 3074457345618258598 - cmp rdx, rax - seta cl + movabs rcx, 3074457345618258598 + cmp rdx, rcx + ja .LBB5_3 mov rax, rdi test al, 3 - setne dil - or dil, cl - jne .LBB5_2 + jne .LBB5_3 lea rcx, [rdx + 2*rdx] or rcx, 3 add rcx, 9 cmp rsi, rcx - je .LBB5_3 -.LBB5_2: + jne .LBB5_3 + ret +.LBB5_3: xor eax, eax mov rdx, rsi -.LBB5_3: ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index ea2d83dbd1..afb0b4c0b1 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1600 -Total Cycles: 539 -Total uOps: 1700 +Instructions: 1500 +Total Cycles: 505 +Total uOps: 1500 Dispatch Width: 4 -uOps Per Cycle: 3.15 +uOps Per Cycle: 2.97 IPC: 2.97 -Block RThroughput: 4.3 +Block RThroughput: 5.0 Instruction Info: @@ -18,19 +18,18 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 3074457345618258598 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_3 1 1 0.33 mov rax, rdi 1 1 0.33 test al, 3 - 1 1 0.50 setne dil - 1 1 0.33 or dil, cl - 1 1 1.00 jne .LBB5_2 + 1 1 1.00 jne .LBB5_3 1 1 0.50 lea rcx, [rdx + 2*rdx] 1 1 0.33 or rcx, 3 1 1 0.33 add rcx, 9 1 1 0.33 cmp rsi, rcx - 1 1 1.00 je .LBB5_3 + 1 1 1.00 jne .LBB5_3 + 1 1 1.00 U ret 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -49,23 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.33 5.32 - 5.35 - - + - - 4.49 4.49 - 5.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.01 0.98 - 0.01 - - movabs rax, 3074457345618258598 - - - - 1.00 - - - - cmp rdx, rax - - - 1.98 - - 0.02 - - seta cl - - - 0.02 0.98 - - - - mov rax, rdi - - - - 0.67 - 0.33 - - test al, 3 - - - 0.67 - - 0.33 - - setne dil - - - 0.99 - - 0.01 - - or dil, cl - - - - - - 1.00 - - jne .LBB5_2 - - - 0.01 0.99 - - - - lea rcx, [rdx + 2*rdx] - - - - 0.01 - 0.99 - - or rcx, 3 - - - 0.65 0.02 - 0.33 - - add rcx, 9 - - - 0.99 0.01 - - - - cmp rsi, rcx - - - - - - 1.00 - - je .LBB5_3 + - - 0.97 0.02 - 0.01 - - movabs rcx, 3074457345618258598 + - - 0.50 0.50 - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_3 + - - 0.50 0.50 - - - - mov rax, rdi + - - 0.02 0.97 - 0.01 - - test al, 3 + - - - - - 1.00 - - jne .LBB5_3 + - - 0.97 0.03 - - - - lea rcx, [rdx + 2*rdx] + - - 0.50 0.50 - - - - or rcx, 3 + - - 0.03 0.97 - - - - add rcx, 9 + - - 0.03 0.97 - - - - cmp rsi, rcx + - - - - - 1.00 - - jne .LBB5_3 + - - - - - 1.00 - - ret - - - - - - - - xor eax, eax - - - 0.01 0.66 - 0.33 - - mov rdx, rsi + - - 0.97 0.03 - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 index 3d8d15b7f6..efee25e23f 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -1,13 +1,12 @@ bench_ref_from_bytes_with_elems_dynamic_size: - movabs rax, 4611686018427387901 - cmp rdx, rax - seta cl + movabs rcx, 4611686018427387901 + cmp rdx, rcx + ja .LBB5_2 mov rax, rdi - or dil, cl - test dil, 1 - jne .LBB5_2 lea rcx, [2*rdx + 4] - cmp rsi, rcx + and edi, 1 + xor rcx, rsi + or rcx, rdi je .LBB5_3 .LBB5_2: xor eax, eax diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca index 602179f3c9..3235e68f2b 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1300 -Total Cycles: 439 -Total uOps: 1400 +Instructions: 1200 +Total Cycles: 371 +Total uOps: 1200 Dispatch Width: 4 -uOps Per Cycle: 3.19 -IPC: 2.96 -Block RThroughput: 3.5 +uOps Per Cycle: 3.23 +IPC: 3.23 +Block RThroughput: 3.0 Instruction Info: @@ -18,15 +18,14 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 4611686018427387901 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_2 1 1 0.33 mov rax, rdi - 1 1 0.33 or dil, cl - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_2 1 1 0.50 lea rcx, [2*rdx + 4] - 1 1 0.33 cmp rsi, rcx + 1 1 0.33 and edi, 1 + 1 1 0.33 xor rcx, rsi + 1 1 0.33 or rcx, rdi 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi @@ -46,20 +45,19 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.32 4.33 - 4.35 - - + - - 3.66 3.66 - 3.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 4611686018427387901 - - - 0.33 0.67 - - - - cmp rdx, rax - - - 1.98 - - 0.02 - - seta cl - - - 0.01 0.99 - - - - mov rax, rdi - - - 1.00 - - - - - or dil, cl - - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_2 - - - - 1.00 - - - - lea rcx, [2*rdx + 4] - - - 0.01 - - 0.99 - - cmp rsi, rcx + - - - 0.99 - 0.01 - - movabs rcx, 4611686018427387901 + - - 0.35 0.33 - 0.32 - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_2 + - - 0.63 0.37 - - - - mov rax, rdi + - - 0.35 0.65 - - - - lea rcx, [2*rdx + 4] + - - 0.34 0.65 - 0.01 - - and edi, 1 + - - 0.99 0.01 - - - - xor rcx, rsi + - - 1.00 - - - - - or rcx, rdi - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - 0.67 - 0.33 - - mov rdx, rsi + - - - 0.66 - 0.34 - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64 b/benches/ref_from_prefix_dynamic_padding.x86-64 index a58592a245..01228fbcbc 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_dynamic_padding.x86-64 @@ -1,22 +1,24 @@ bench_ref_from_prefix_dynamic_padding: - xor edx, edx - mov eax, 0 - test dil, 3 - je .LBB5_1 - ret -.LBB5_1: movabs rax, 9223372036854775804 - and rsi, rax - cmp rsi, 9 - jae .LBB5_3 + and rax, rsi + cmp rax, 9 + jae .LBB5_2 mov edx, 1 - xor eax, eax + xor ecx, ecx + mov rax, rcx + ret +.LBB5_2: + xor edx, edx + mov ecx, 0 + test dil, 3 + je .LBB5_3 + mov rax, rcx ret .LBB5_3: - add rsi, -9 + add rax, -9 movabs rcx, -6148914691236517205 - mov rax, rsi mul rcx shr rdx - mov rax, rdi + mov rcx, rdi + mov rax, rcx ret diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_dynamic_padding.x86-64.mca index 62ea4babaf..6e50e96210 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 608 -Total uOps: 2000 +Instructions: 2100 +Total Cycles: 673 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 3.13 -Block RThroughput: 5.0 +uOps Per Cycle: 3.27 +IPC: 3.12 +Block RThroughput: 5.5 Instruction Info: @@ -18,24 +18,26 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 - 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 - 1 1 0.33 and rsi, rax - 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 0.33 and rax, rsi + 1 1 0.33 cmp rax, 9 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 - 1 0 0.25 xor eax, eax + 1 0 0.25 xor ecx, ecx + 1 1 0.33 mov rax, rcx + 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov ecx, 0 + 1 1 0.33 test dil, 3 + 1 1 1.00 je .LBB5_3 + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret - 1 1 0.33 add rsi, -9 + 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 - 1 1 0.33 mov rax, rsi 2 4 1.00 mul rcx 1 1 0.50 shr rdx - 1 1 0.33 mov rax, rdi + 1 1 0.33 mov rcx, rdi + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret @@ -52,26 +54,28 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.00 6.00 - 6.00 - - + - - 6.67 6.66 - 6.67 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.01 - 0.01 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.33 0.67 - - - - and rax, rsi + - - - 1.00 - - - - cmp rax, 9 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.67 - - 0.33 - - mov edx, 1 + - - - - - - - - xor ecx, ecx + - - 0.66 - - 0.34 - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.01 0.99 - - - - movabs rax, 9223372036854775804 - - - - 1.00 - - - - and rsi, rax - - - - 1.00 - - - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 - - - 1.00 - - - - - mov edx, 1 - - - - - - - - - xor eax, eax + - - - - - - - - xor edx, edx + - - 0.67 0.33 - - - - mov ecx, 0 + - - - 0.67 - 0.33 - - test dil, 3 + - - - - - 1.00 - - je .LBB5_3 + - - 0.34 0.66 - - - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.02 0.02 - 0.96 - - add rsi, -9 - - - 0.99 0.01 - - - - movabs rcx, -6148914691236517205 - - - 0.01 0.99 - - - - mov rax, rsi + - - - 1.00 - - - - add rax, -9 + - - 1.00 - - - - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 1.00 - - - - - shr rdx - - - 0.98 - - 0.02 - - mov rax, rdi + - - 0.33 0.34 - 0.33 - - mov rcx, rdi + - - 0.01 0.66 - 0.33 - - mov rax, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_size.x86-64 b/benches/ref_from_prefix_dynamic_size.x86-64 index fe6332c910..e402765c33 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_dynamic_size.x86-64 @@ -1,14 +1,14 @@ bench_ref_from_prefix_dynamic_size: - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB5_2 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB5_2: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_4 add rsi, -4 shr rsi mov rdx, rsi diff --git a/benches/ref_from_prefix_dynamic_size.x86-64.mca b/benches/ref_from_prefix_dynamic_size.x86-64.mca index 3900a59461..ce71749bc4 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 1400 -Total Cycles: 405 +Total Cycles: 404 Total uOps: 1400 Dispatch Width: 4 -uOps Per Cycle: 3.46 -IPC: 3.46 +uOps Per Cycle: 3.47 +IPC: 3.47 Block RThroughput: 4.0 @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_4 1 1 0.33 add rsi, -4 1 1 0.50 shr rsi 1 1 0.33 mov rdx, rsi @@ -47,21 +47,21 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.99 3.99 - 4.02 - - + - - 3.99 4.00 - 4.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.02 0.98 - - - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.98 0.01 - 0.01 - - mov edx, 1 + - - 0.99 - - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jae .LBB5_2 + - - - 1.00 - - - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.01 0.99 - - - - add rsi, -4 + - - - - - - - - xor edx, edx + - - 1.00 - - - - - mov eax, 0 + - - - 1.00 - - - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_4 + - - 1.00 - - - - - add rsi, -4 - - 1.00 - - - - - shr rsi - - - 1.00 - - - - mov rdx, rsi - - - 0.99 0.01 - - - - mov rax, rdi + - - - 1.00 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_static_size.x86-64 b/benches/ref_from_prefix_static_size.x86-64 index 7c1bf45bb6..0328ae9719 100644 --- a/benches/ref_from_prefix_static_size.x86-64 +++ b/benches/ref_from_prefix_static_size.x86-64 @@ -1,8 +1,7 @@ bench_ref_from_prefix_static_size: xor eax, eax - cmp rsi, 6 - mov rcx, rdi - cmovb rcx, rax test dil, 1 - cmove rax, rcx + cmovne rdi, rax + cmp rsi, 6 + cmovae rax, rdi ret diff --git a/benches/ref_from_prefix_static_size.x86-64.mca b/benches/ref_from_prefix_static_size.x86-64.mca index 9691b88fe0..d4355bc6e8 100644 --- a/benches/ref_from_prefix_static_size.x86-64.mca +++ b/benches/ref_from_prefix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 700 -Total Cycles: 274 -Total uOps: 900 +Instructions: 600 +Total Cycles: 305 +Total uOps: 800 Dispatch Width: 4 -uOps Per Cycle: 3.28 -IPC: 2.55 -Block RThroughput: 2.3 +uOps Per Cycle: 2.62 +IPC: 1.97 +Block RThroughput: 2.0 Instruction Info: @@ -19,11 +19,10 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 0 0.25 xor eax, eax - 1 1 0.33 cmp rsi, 6 - 1 1 0.33 mov rcx, rdi - 2 2 0.67 cmovb rcx, rax 1 1 0.33 test dil, 1 - 2 2 0.67 cmove rax, rcx + 2 2 0.67 cmovne rdi, rax + 1 1 0.33 cmp rsi, 6 + 2 2 0.67 cmovae rax, rdi 1 1 1.00 U ret @@ -40,14 +39,13 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 2.66 2.67 - 2.67 - - + - - 1.95 2.28 - 2.77 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - xor eax, eax - - - - 0.01 - 0.99 - - cmp rsi, 6 - - - 0.01 0.67 - 0.32 - - mov rcx, rdi - - - 1.00 0.99 - 0.01 - - cmovb rcx, rax - - - 0.66 0.01 - 0.33 - - test dil, 1 - - - 0.99 0.99 - 0.02 - - cmove rax, rcx + - - 0.05 0.06 - 0.89 - - test dil, 1 + - - 0.95 0.94 - 0.11 - - cmovne rdi, rax + - - - 0.34 - 0.66 - - cmp rsi, 6 + - - 0.95 0.94 - 0.11 - - cmovae rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 index 5b31277bde..2552d72393 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -1,26 +1,21 @@ bench_ref_from_prefix_with_elems_dynamic_padding: - movabs rax, 3074457345618258598 - cmp rdx, rax - ja .LBB5_1 - xor ecx, ecx - mov eax, 0 - test dil, 3 - je .LBB5_3 - mov rdx, rcx - ret -.LBB5_1: + mov rcx, rdx mov edx, 1 - xor eax, eax - ret -.LBB5_3: - lea rax, [rdx + 2*rdx] + movabs rax, 3074457345618258598 + cmp rcx, rax + ja .LBB5_3 + lea rax, [rcx + 2*rcx] or rax, 3 add rax, 9 - xor r8d, r8d cmp rax, rsi - mov ecx, 1 - cmovbe rcx, rdx - cmova rdi, r8 - mov rax, rdi + jbe .LBB5_4 +.LBB5_3: + xor eax, eax + ret +.LBB5_4: + xor eax, eax + test dil, 3 + cmovne rcx, rax + cmove rax, rdi mov rdx, rcx ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 2f212ec6d0..d69beeedc4 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2300 -Total Cycles: 807 -Total uOps: 2700 +Instructions: 1800 +Total Cycles: 605 +Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 3.35 -IPC: 2.85 -Block RThroughput: 6.8 +uOps Per Cycle: 3.31 +IPC: 2.98 +Block RThroughput: 5.0 Instruction Info: @@ -18,27 +18,22 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_3 - 1 1 0.33 mov rdx, rcx - 1 1 1.00 U ret + 1 1 0.33 mov rcx, rdx 1 1 0.33 mov edx, 1 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret - 1 1 0.50 lea rax, [rdx + 2*rdx] + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [rcx + 2*rcx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 - 1 0 0.25 xor r8d, r8d 1 1 0.33 cmp rax, rsi - 1 1 0.33 mov ecx, 1 - 3 3 1.00 cmovbe rcx, rdx - 3 3 1.00 cmova rdi, r8 - 1 1 0.33 mov rax, rdi + 1 1 1.00 jbe .LBB5_4 + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 0.33 test dil, 3 + 2 2 0.67 cmovne rcx, rax + 2 2 0.67 cmove rax, rdi 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -56,30 +51,25 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.99 7.99 - 8.02 - - + - - 5.99 5.99 - 6.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.47 0.52 - 0.01 - - movabs rax, 3074457345618258598 - - - 0.94 0.01 - 0.05 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - - - - - - - xor ecx, ecx - - - 0.03 0.97 - - - - mov eax, 0 - - - 0.01 0.52 - 0.47 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_3 - - - 0.03 0.51 - 0.46 - - mov rdx, rcx - - - - - - 1.00 - - ret - - - 0.04 0.96 - - - - mov edx, 1 + - - 0.98 0.01 - 0.01 - - mov rcx, rdx + - - 0.01 0.99 - - - - mov edx, 1 + - - 0.02 0.98 - - - - movabs rax, 3074457345618258598 + - - 0.98 0.01 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.01 0.99 - - - - lea rax, [rcx + 2*rcx] + - - 0.99 0.01 - - - - or rax, 3 + - - 0.99 0.01 - - - - add rax, 9 + - - 0.99 0.01 - - - - cmp rax, rsi + - - - - - 1.00 - - jbe .LBB5_4 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.01 0.99 - - - - lea rax, [rdx + 2*rdx] - - - 0.52 0.48 - - - - or rax, 3 - - - 0.51 0.49 - - - - add rax, 9 - - - - - - - - - xor r8d, r8d - - - 0.97 0.03 - - - - cmp rax, rsi - - - 0.01 0.99 - - - - mov ecx, 1 - - - 1.04 0.97 - 0.99 - - cmovbe rcx, rdx - - - 1.44 0.54 - 1.02 - - cmova rdi, r8 - - - 0.97 0.01 - 0.02 - - mov rax, rdi - - - 1.00 - - - - - mov rdx, rcx + - - - - - - - - xor eax, eax + - - 0.01 0.98 - 0.01 - - test dil, 3 + - - 0.01 0.99 - 1.00 - - cmovne rcx, rax + - - 0.01 1.00 - 0.99 - - cmove rax, rdi + - - 0.99 0.01 - - - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 index 069fd4859c..1d6a8e334b 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,22 +1,19 @@ bench_ref_from_prefix_with_elems_dynamic_size: - movabs rax, 4611686018427387901 - cmp rdx, rax - ja .LBB5_1 mov rcx, rdx - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 lea rax, [2*rcx + 4] - xor r8d, r8d cmp rax, rsi - mov edx, 1 - cmovbe rdx, rcx - cmova rdi, r8 - mov rax, rdi -.LBB5_4: + jbe .LBB5_4 +.LBB5_3: + xor eax, eax ret -.LBB5_1: - mov edx, 1 +.LBB5_4: xor eax, eax + test dil, 1 + cmovne rcx, rax + cmove rax, rdi + mov rdx, rcx ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 6f22726406..da9883ddde 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 672 -Total uOps: 2300 +Instructions: 1600 +Total Cycles: 603 +Total uOps: 1800 Dispatch Width: 4 -uOps Per Cycle: 3.42 -IPC: 2.83 -Block RThroughput: 5.8 +uOps Per Cycle: 2.99 +IPC: 2.65 +Block RThroughput: 4.5 Instruction Info: @@ -18,24 +18,21 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 1 1 0.33 mov rcx, rdx - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 1 1 0.50 lea rax, [2*rcx + 4] - 1 0 0.25 xor r8d, r8d 1 1 0.33 cmp rax, rsi - 1 1 0.33 mov edx, 1 - 3 3 1.00 cmovbe rdx, rcx - 3 3 1.00 cmova rdi, r8 - 1 1 0.33 mov rax, rdi + 1 1 1.00 jbe .LBB5_4 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax + 1 1 0.33 test dil, 1 + 2 2 0.67 cmovne rcx, rax + 2 2 0.67 cmove rax, rdi + 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -52,26 +49,23 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.66 6.66 - 6.68 - - + - - 5.33 5.33 - 5.34 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 4611686018427387901 - - - 0.37 0.63 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.63 0.37 - - - - mov rcx, rdx - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.01 0.99 - - - - lea rax, [2*rcx + 4] - - - - - - - - - xor r8d, r8d - - - 1.00 - - - - - cmp rax, rsi - - - - 0.67 - 0.33 - - mov edx, 1 - - - 0.73 0.98 - 1.29 - - cmovbe rdx, rcx - - - 1.60 0.36 - 1.04 - - cmova rdi, r8 - - - 0.99 0.01 - - - - mov rax, rdi + - - 0.48 0.45 - 0.07 - - mov rcx, rdx + - - 0.45 0.49 - 0.06 - - mov edx, 1 + - - 0.18 0.25 - 0.57 - - movabs rax, 4611686018427387901 + - - 0.24 0.51 - 0.25 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.52 0.48 - - - - lea rax, [2*rcx + 4] + - - 0.47 0.53 - - - - cmp rax, rsi + - - - - - 1.00 - - jbe .LBB5_4 + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.34 0.66 - - - - mov edx, 1 - - - - - - - - xor eax, eax + - - 0.47 0.50 - 0.03 - - test dil, 1 + - - 1.00 1.00 - - - - cmovne rcx, rax + - - 0.99 0.66 - 0.35 - - cmove rax, rdi + - - 0.53 0.46 - 0.01 - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64 b/benches/ref_from_suffix_dynamic_padding.x86-64 index 3e05f6023f..9da52dcae0 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_dynamic_padding.x86-64 @@ -1,11 +1,11 @@ bench_ref_from_suffix_dynamic_padding: - lea eax, [rsi + rdi] - test al, 3 - jne .LBB5_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 + jb .LBB5_1 + lea ecx, [rsi + rdi] + test cl, 3 + je .LBB5_3 .LBB5_1: xor eax, eax ret diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_dynamic_padding.x86-64.mca index 73599d5b6a..929873f5e7 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_dynamic_padding.x86-64.mca @@ -1,10 +1,10 @@ Iterations: 100 Instructions: 2000 -Total Cycles: 682 +Total Cycles: 683 Total uOps: 2100 Dispatch Width: 4 -uOps Per Cycle: 3.08 +uOps Per Cycle: 3.07 IPC: 2.93 Block RThroughput: 5.3 @@ -18,13 +18,13 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] - 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea ecx, [rsi + rdi] + 1 1 0.33 test cl, 3 + 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -53,27 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.65 6.67 - 6.68 - - + - - 6.67 6.65 - 6.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.90 0.10 - - - - lea eax, [rsi + rdi] - - - 0.93 - - 0.07 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 - - - 0.51 0.47 - 0.02 - - movabs rax, 9223372036854775804 - - - - - - 1.00 - - and rax, rsi - - - - 0.09 - 0.91 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - 0.05 0.32 - 0.63 - - movabs rax, 9223372036854775804 + - - 0.63 0.03 - 0.34 - - and rax, rsi + - - 0.94 0.03 - 0.03 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.05 0.95 - - - - lea ecx, [rsi + rdi] + - - 0.03 0.97 - - - - test cl, 3 + - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.43 0.47 - 0.10 - - add rax, -9 - - - 0.42 0.39 - 0.19 - - movabs rcx, -6148914691236517205 + - - 0.35 0.35 - 0.30 - - add rax, -9 + - - 0.95 0.04 - 0.01 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 0.69 - - 0.31 - - shr rdx - - - 0.54 0.46 - - - - lea rax, [rdx + 2*rdx] - - - 0.07 0.91 - 0.02 - - sub rsi, rax - - - 0.91 0.05 - 0.04 - - or rax, -4 - - - 0.08 0.90 - 0.02 - - add rsi, rdi - - - 0.09 0.91 - - - - add rax, rsi - - - 0.08 0.92 - - - - add rax, -8 + - - 0.65 0.35 - - - - lea rax, [rdx + 2*rdx] + - - 0.30 0.35 - 0.35 - - sub rsi, rax + - - 0.66 0.02 - 0.32 - - or rax, -4 + - - 0.02 0.64 - 0.34 - - add rsi, rdi + - - 0.33 0.65 - 0.02 - - add rax, rsi + - - 0.02 0.95 - 0.03 - - add rax, -8 - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_dynamic_size.x86-64 b/benches/ref_from_suffix_dynamic_size.x86-64 index bd4ace8983..13fdcf8624 100644 --- a/benches/ref_from_suffix_dynamic_size.x86-64 +++ b/benches/ref_from_suffix_dynamic_size.x86-64 @@ -1,13 +1,15 @@ bench_ref_from_suffix_dynamic_size: - mov rdx, rsi + cmp rsi, 4 + jb .LBB5_1 + mov rax, rdi lea ecx, [rsi + rdi] - mov eax, edx - and eax, 1 - add rax, rdi - xor esi, esi - sub rdx, 4 - cmovb rax, rsi - shr rdx test cl, 1 - cmovne rax, rsi + jne .LBB5_1 + lea rdx, [rsi - 4] + shr rdx + and esi, 1 + add rax, rsi + ret +.LBB5_1: + xor eax, eax ret diff --git a/benches/ref_from_suffix_dynamic_size.x86-64.mca b/benches/ref_from_suffix_dynamic_size.x86-64.mca index 1398bcfe27..949b83310c 100644 --- a/benches/ref_from_suffix_dynamic_size.x86-64.mca +++ b/benches/ref_from_suffix_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1200 -Total Cycles: 439 -Total uOps: 1400 +Instructions: 1300 +Total Cycles: 405 +Total uOps: 1300 Dispatch Width: 4 -uOps Per Cycle: 3.19 -IPC: 2.73 -Block RThroughput: 3.5 +uOps Per Cycle: 3.21 +IPC: 3.21 +Block RThroughput: 4.0 Instruction Info: @@ -18,17 +18,18 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi + 1 1 0.33 cmp rsi, 4 + 1 1 1.00 jb .LBB5_1 + 1 1 0.33 mov rax, rdi 1 1 0.50 lea ecx, [rsi + rdi] - 1 1 0.33 mov eax, edx - 1 1 0.33 and eax, 1 - 1 1 0.33 add rax, rdi - 1 0 0.25 xor esi, esi - 1 1 0.33 sub rdx, 4 - 2 2 0.67 cmovb rax, rsi - 1 1 0.50 shr rdx 1 1 0.33 test cl, 1 - 2 2 0.67 cmovne rax, rsi + 1 1 1.00 jne .LBB5_1 + 1 1 0.50 lea rdx, [rsi - 4] + 1 1 0.50 shr rdx + 1 1 0.33 and esi, 1 + 1 1 0.33 add rax, rsi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -45,19 +46,20 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.33 4.33 - 4.34 - - + - - 3.99 3.99 - 4.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.02 0.32 - 0.66 - - mov rdx, rsi - - - 0.32 0.68 - - - - lea ecx, [rsi + rdi] - - - 0.66 - - 0.34 - - mov eax, edx - - - 0.02 0.33 - 0.65 - - and eax, 1 - - - - 0.99 - 0.01 - - add rax, rdi - - - - - - - - - xor esi, esi - - - 0.65 - - 0.35 - - sub rdx, 4 - - - 1.00 1.00 - - - - cmovb rax, rsi - - - 0.66 - - 0.34 - - shr rdx - - - - 0.01 - 0.99 - - test cl, 1 - - - 1.00 1.00 - - - - cmovne rax, rsi + - - 0.02 0.97 - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.97 0.03 - - - - mov rax, rdi + - - 0.01 0.99 - - - - lea ecx, [rsi + rdi] + - - 0.98 0.02 - - - - test cl, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.97 0.03 - - - - lea rdx, [rsi - 4] + - - 1.00 - - - - - shr rdx + - - 0.02 0.98 - - - - and esi, 1 + - - 0.02 0.97 - 0.01 - - add rax, rsi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_static_size.x86-64 b/benches/ref_from_suffix_static_size.x86-64 index 9e90b9e254..4f003e061d 100644 --- a/benches/ref_from_suffix_static_size.x86-64 +++ b/benches/ref_from_suffix_static_size.x86-64 @@ -1,13 +1,12 @@ bench_ref_from_suffix_static_size: - lea eax, [rsi + rdi] cmp rsi, 6 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rax, [rdi + rsi] add rax, -6 ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/ref_from_suffix_static_size.x86-64.mca b/benches/ref_from_suffix_static_size.x86-64.mca index ef5892647b..70da98d6db 100644 --- a/benches/ref_from_suffix_static_size.x86-64.mca +++ b/benches/ref_from_suffix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1100 -Total Cycles: 338 -Total uOps: 1100 +Instructions: 1000 +Total Cycles: 404 +Total uOps: 1000 Dispatch Width: 4 -uOps Per Cycle: 3.25 -IPC: 3.25 -Block RThroughput: 3.0 +uOps Per Cycle: 2.48 +IPC: 2.48 +Block RThroughput: 4.0 Instruction Info: @@ -18,17 +18,16 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 6 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rax, [rdi + rsi] 1 1 0.33 add rax, -6 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -44,18 +43,17 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.32 3.33 - 3.35 - - + - - 2.49 2.50 - 4.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.97 0.03 - - - - lea eax, [rsi + rdi] - - - 0.33 0.32 - 0.35 - - cmp rsi, 6 - - - 1.00 - - - - - setb cl - - - - 1.00 - - - - or cl, al - - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 - - - - - - - - - xor eax, eax - - - - - - 1.00 - - ret + - - 0.49 0.50 - 0.01 - - cmp rsi, 6 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.50 0.50 - - - - lea eax, [rsi + rdi] + - - 0.66 0.34 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 - - 0.34 0.66 - - - - lea rax, [rdi + rsi] - - - 0.68 0.32 - - - - add rax, -6 + - - 0.50 0.50 - - - - add rax, -6 + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 index c3d10b5fc6..e1844f6b1e 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -1,27 +1,26 @@ bench_ref_from_suffix_with_elems_dynamic_padding: + mov rcx, rdx + mov edx, 1 movabs rax, 3074457345618258598 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 3 - je .LBB5_3 - mov rdx, rcx - ret -.LBB5_3: - lea rax, [rdx + 2*rdx] + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] or rax, 3 add rax, 9 - sub rsi, rax - jae .LBB5_4 -.LBB5_1: + mov r8, rsi + sub r8, rax + jae .LBB5_2 +.LBB5_4: xor eax, eax - mov edx, 1 +.LBB5_5: ret -.LBB5_4: - add rdi, rsi - mov rcx, rdx - mov rax, rdi +.LBB5_2: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 3 + jne .LBB5_5 + add rdi, r8 mov rdx, rcx + mov rax, rdi ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index 92e6280bb4..6cde05d596 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2300 -Total Cycles: 706 -Total uOps: 2300 +Instructions: 2200 +Total Cycles: 671 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.26 -IPC: 3.26 -Block RThroughput: 6.0 +uOps Per Cycle: 3.28 +IPC: 3.28 +Block RThroughput: 5.5 Instruction Info: @@ -18,28 +18,27 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 3 - 1 1 1.00 je .LBB5_3 - 1 1 0.33 mov rdx, rcx - 1 1 1.00 U ret - 1 1 0.50 lea rax, [rdx + 2*rdx] + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 - 1 1 0.33 sub rsi, rax - 1 1 1.00 jae .LBB5_4 + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax + 1 1 1.00 jae .LBB5_2 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.33 add rdi, rsi - 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov rax, rdi + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 3 + 1 1 1.00 jne .LBB5_5 + 1 1 0.33 add rdi, r8 1 1 0.33 mov rdx, rcx + 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -56,30 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.99 7.00 - 7.01 - - + - - 6.66 6.66 - 6.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 3074457345618258598 - - - 0.01 0.50 - 0.49 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - - 1.00 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.50 0.49 - 0.01 - - mov eax, 0 - - - 0.49 0.51 - - - - test r8b, 3 - - - - - - 1.00 - - je .LBB5_3 - - - 0.51 0.49 - - - - mov rdx, rcx - - - - - - 1.00 - - ret - - - 0.50 0.50 - - - - lea rax, [rdx + 2*rdx] - - - 1.00 - - - - - or rax, 3 + - - 0.66 0.33 - 0.01 - - mov rcx, rdx + - - 0.33 0.67 - - - - mov edx, 1 + - - 0.67 0.33 - - - - movabs rax, 3074457345618258598 + - - 0.99 - - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - - 1.00 - - - - lea rax, [rcx + 2*rcx] + - - 0.34 - - 0.66 - - or rax, 3 - - 1.00 - - - - - add rax, 9 - - - 0.99 0.01 - - - - sub rsi, rax - - - - - - 1.00 - - jae .LBB5_4 + - - - 0.34 - 0.66 - - mov r8, rsi + - - 1.00 - - - - - sub r8, rax + - - - - - 1.00 - - jae .LBB5_2 - - - - - - - - xor eax, eax - - - - 1.00 - - - - mov edx, 1 - - - - - 1.00 - - ret - - - 1.00 - - - - - add rdi, rsi - - - - 1.00 - - - - mov rcx, rdx - - - 0.99 0.01 - - - - mov rax, rdi - - - - 0.50 - 0.50 - - mov rdx, rcx + - - - 1.00 - - - - add esi, edi + - - - - - - - - xor edx, edx + - - - 0.99 - 0.01 - - mov eax, 0 + - - 0.33 0.34 - 0.33 - - test sil, 3 + - - - - - 1.00 - - jne .LBB5_5 + - - 0.67 0.33 - - - - add rdi, r8 + - - 0.33 0.67 - - - - mov rdx, rcx + - - 0.34 0.66 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 index bdca571924..b9414b2d4c 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,23 +1,24 @@ bench_ref_from_suffix_with_elems_dynamic_size: + mov rcx, rdx + mov edx, 1 movabs rax, 4611686018427387901 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 1 - jne .LBB5_5 - lea rax, [2*rdx + 4] - sub rsi, rax + cmp rcx, rax + ja .LBB5_3 + lea rax, [2*rcx + 4] + mov r8, rsi + sub r8, rax jae .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: - add rdi, rsi - mov rcx, rdx - mov rax, rdi -.LBB5_5: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 1 + jne .LBB5_6 + add rdi, r8 mov rdx, rcx + mov rax, rdi +.LBB5_6: ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca index 6d9de0b3eb..46ce6b7d5e 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 571 -Total uOps: 1900 +Instructions: 2000 +Total Cycles: 604 +Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 3.33 -IPC: 3.33 +uOps Per Cycle: 3.31 +IPC: 3.31 Block RThroughput: 5.0 @@ -18,24 +18,25 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 - 1 1 0.50 lea rax, [2*rdx + 4] - 1 1 0.33 sub rsi, rax + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [2*rcx + 4] + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax 1 1 1.00 jae .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.33 add rdi, rsi - 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov rax, rdi + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 1 + 1 1 1.00 jne .LBB5_6 + 1 1 0.33 add rdi, r8 1 1 0.33 mov rdx, rcx + 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -52,26 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.66 5.66 - 5.68 - - + - - 5.99 6.00 - 6.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.66 0.33 - 0.01 - - movabs rax, 4611686018427387901 - - - 0.01 0.99 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.99 0.01 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.33 0.33 - 0.34 - - mov eax, 0 - - - 0.33 0.34 - 0.33 - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.34 0.66 - - - - lea rax, [2*rdx + 4] - - - - 1.00 - - - - sub rsi, rax + - - 0.04 0.95 - 0.01 - - mov rcx, rdx + - - - 1.00 - - - - mov edx, 1 + - - 1.00 - - - - - movabs rax, 4611686018427387901 + - - - - - 1.00 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - - 1.00 - - - - lea rax, [2*rcx + 4] + - - 1.00 - - - - - mov r8, rsi + - - - 1.00 - - - - sub r8, rax - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - xor eax, eax - - - 1.00 - - - - - mov edx, 1 - - - - - 1.00 - - ret - - - - 1.00 - - - - add rdi, rsi - - - 1.00 - - - - - mov rcx, rdx - - - 0.32 0.68 - - - - mov rax, rdi - - - 0.68 0.32 - - - - mov rdx, rcx + - - 1.00 - - - - - add esi, edi + - - - - - - - - xor edx, edx + - - - 1.00 - - - - mov eax, 0 + - - 1.00 - - - - - test sil, 1 + - - - - - 1.00 - - jne .LBB5_6 + - - - 1.00 - - - - add rdi, r8 + - - 1.00 - - - - - mov rdx, rcx + - - 0.95 0.05 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_dynamic_padding.x86-64 index 217c5fc617..5b6bc5b189 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64 @@ -1,24 +1,27 @@ bench_try_ref_from_bytes_dynamic_padding: - test dil, 3 - jne .LBB5_4 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_4 + jb .LBB5_1 add rax, -9 movabs rcx, -6148914691236517205 mul rcx + test dil, 3 + jne .LBB5_1 shr rdx lea rax, [rdx + 2*rdx] or rax, 3 add rax, 9 cmp rsi, rax - jne .LBB5_4 - cmp word ptr [rdi], -16192 - je .LBB5_5 -.LBB5_4: - xor edi, edi + jne .LBB5_1 + movzx ecx, word ptr [rdi] + xor eax, eax + cmp ecx, 49344 + cmove rsi, rdx + cmove rax, rdi + mov rdx, rsi + ret +.LBB5_1: + xor eax, eax mov rdx, rsi -.LBB5_5: - mov rax, rdi ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca index 95b993c7e0..ccc679bdcf 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2100 -Total Cycles: 709 -Total uOps: 2300 +Instructions: 2500 +Total Cycles: 1008 +Total uOps: 2800 Dispatch Width: 4 -uOps Per Cycle: 3.24 -IPC: 2.96 -Block RThroughput: 5.8 +uOps Per Cycle: 2.78 +IPC: 2.48 +Block RThroughput: 7.0 Instruction Info: @@ -18,26 +18,30 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_4 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_4 + 1 1 1.00 jb .LBB5_1 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx + 1 1 0.33 test dil, 3 + 1 1 1.00 jne .LBB5_1 1 1 0.50 shr rdx 1 1 0.50 lea rax, [rdx + 2*rdx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 1 1 0.33 cmp rsi, rax - 1 1 1.00 jne .LBB5_4 - 2 6 0.50 * cmp word ptr [rdi], -16192 - 1 1 1.00 je .LBB5_5 - 1 0 0.25 xor edi, edi + 1 1 1.00 jne .LBB5_1 + 1 5 0.50 * movzx ecx, word ptr [rdi] + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp ecx, 49344 + 2 2 0.67 cmove rsi, rdx + 2 2 0.67 cmove rax, rdi + 1 1 0.33 mov rdx, rsi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -54,28 +58,32 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.98 6.99 - 7.03 0.50 0.50 + - - 8.02 8.01 - 8.97 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.48 0.51 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.51 0.49 - - - - movabs rax, 9223372036854775804 - - - 0.01 0.99 - - - - and rax, rsi - - - 0.51 0.49 - - - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_4 - - - 0.98 - - 0.02 - - add rax, -9 - - - 0.98 0.02 - - - - movabs rcx, -6148914691236517205 + - - 0.07 0.04 - 0.89 - - movabs rax, 9223372036854775804 + - - 0.97 0.01 - 0.02 - - and rax, rsi + - - - 0.99 - 0.01 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.99 0.01 - - - - add rax, -9 + - - 0.02 0.06 - 0.92 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.99 - - 0.01 - - shr rdx - - - - 1.00 - - - - lea rax, [rdx + 2*rdx] - - - - 0.51 - 0.49 - - or rax, 3 - - - 0.01 0.49 - 0.50 - - add rax, 9 - - - - 0.02 - 0.98 - - cmp rsi, rax - - - - - - 1.00 - - jne .LBB5_4 - - - 0.51 0.49 - - 0.50 0.50 cmp word ptr [rdi], -16192 - - - - - - 1.00 - - je .LBB5_5 - - - - - - - - - xor edi, edi - - - 0.50 0.50 - - - - mov rdx, rsi - - - 0.50 0.48 - 0.02 - - mov rax, rdi + - - 0.07 0.91 - 0.02 - - test dil, 3 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.97 - - 0.03 - - shr rdx + - - 0.03 0.97 - - - - lea rax, [rdx + 2*rdx] + - - 0.02 0.95 - 0.03 - - or rax, 3 + - - 0.03 0.96 - 0.01 - - add rax, 9 + - - 0.01 0.98 - 0.01 - - cmp rsi, rax + - - - - - 1.00 - - jne .LBB5_1 + - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] + - - - - - - - - xor eax, eax + - - 0.91 0.06 - 0.03 - - cmp ecx, 49344 + - - 0.97 0.04 - 0.99 - - cmove rsi, rdx + - - 0.99 0.99 - 0.02 - - cmove rax, rdi + - - 0.96 0.03 - 0.01 - - mov rdx, rsi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax + - - 0.01 0.01 - 0.98 - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64 b/benches/try_ref_from_bytes_dynamic_size.x86-64 index cf67afd31c..15d08c143c 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64 @@ -1,22 +1,22 @@ bench_try_ref_from_bytes_dynamic_size: - mov rdx, rsi - mov rax, rdi cmp rsi, 4 - setb cl - or cl, al - test cl, 1 - jne .LBB5_4 - lea rcx, [rdx - 4] - mov rsi, rcx - and rsi, -2 - add rsi, 4 - cmp rdx, rsi - jne .LBB5_4 - cmp word ptr [rax], -16192 - jne .LBB5_4 + jb .LBB5_1 + test dil, 1 + jne .LBB5_1 + mov rdx, rsi + lea rcx, [rsi - 4] + mov rax, rcx + and rax, -2 + add rax, 4 + cmp rsi, rax + jne .LBB5_1 shr rcx - mov rdx, rcx + movzx esi, word ptr [rdi] + xor eax, eax + cmp esi, 49344 + cmove rdx, rcx + cmove rax, rdi ret -.LBB5_4: +.LBB5_1: xor eax, eax ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca index ecd7a18f6d..99a6a1d9f3 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 Instructions: 2000 -Total Cycles: 639 -Total uOps: 2100 +Total Cycles: 641 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 3.13 -Block RThroughput: 5.3 +uOps Per Cycle: 3.43 +IPC: 3.12 +Block RThroughput: 5.5 Instruction Info: @@ -18,23 +18,23 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi 1 1 0.33 cmp rsi, 4 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 jne .LBB5_4 - 1 1 0.50 lea rcx, [rdx - 4] - 1 1 0.33 mov rsi, rcx - 1 1 0.33 and rsi, -2 - 1 1 0.33 add rsi, 4 - 1 1 0.33 cmp rdx, rsi - 1 1 1.00 jne .LBB5_4 - 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jb .LBB5_1 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_1 + 1 1 0.33 mov rdx, rsi + 1 1 0.50 lea rcx, [rsi - 4] + 1 1 0.33 mov rax, rcx + 1 1 0.33 and rax, -2 + 1 1 0.33 add rax, 4 + 1 1 0.33 cmp rsi, rax + 1 1 1.00 jne .LBB5_1 1 1 0.50 shr rcx - 1 1 0.33 mov rdx, rcx + 1 5 0.50 * movzx esi, word ptr [rdi] + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp esi, 49344 + 2 2 0.67 cmove rdx, rcx + 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -53,27 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.32 6.32 - 6.36 0.50 0.50 + - - 6.31 6.32 - 6.37 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.33 0.66 - 0.01 - - mov rdx, rsi - - - 0.66 0.34 - - - - mov rax, rdi - - - 0.34 0.66 - - - - cmp rsi, 4 - - - 0.99 - - 0.01 - - setb cl - - - 0.01 0.99 - - - - or cl, al - - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.66 0.34 - - - - lea rcx, [rdx - 4] - - - 0.33 0.66 - 0.01 - - mov rsi, rcx - - - 1.00 - - - - - and rsi, -2 - - - 0.66 0.34 - - - - add rsi, 4 - - - - 1.00 - - - - cmp rdx, rsi - - - - - - 1.00 - - jne .LBB5_4 - - - - - - 1.00 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.67 - - 0.33 - - shr rcx - - - 0.67 0.33 - - - - mov rdx, rcx + - - 0.95 0.03 - 0.02 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.37 0.63 - - - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.66 0.33 - 0.01 - - mov rdx, rsi + - - 0.33 0.67 - - - - lea rcx, [rsi - 4] + - - 0.01 0.99 - - - - mov rax, rcx + - - - 1.00 - - - - and rax, -2 + - - - 0.99 - 0.01 - - add rax, 4 + - - 0.01 0.99 - - - - cmp rsi, rax + - - - - - 1.00 - - jne .LBB5_1 + - - 1.00 - - - - - shr rcx + - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] + - - - - - - - - xor eax, eax + - - 0.99 0.01 - - - - cmp esi, 49344 + - - 0.99 0.02 - 0.99 - - cmove rdx, rcx + - - 1.00 0.66 - 0.34 - - cmove rax, rdi - - - - - 1.00 - - ret - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 index 3ef8d1448a..2ea5118fa3 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -1,21 +1,19 @@ bench_try_ref_from_bytes_with_elems_dynamic_padding: - movabs rax, 3074457345618258598 - cmp rdx, rax - seta cl + movabs rcx, 3074457345618258598 + cmp rdx, rcx + ja .LBB5_4 mov rax, rdi test al, 3 - setne dil - or dil, cl - jne .LBB5_3 + jne .LBB5_4 lea rcx, [rdx + 2*rdx] or rcx, 3 add rcx, 9 cmp rsi, rcx - jne .LBB5_3 + jne .LBB5_4 cmp word ptr [rax], -16192 - je .LBB5_4 -.LBB5_3: + je .LBB5_5 +.LBB5_4: xor eax, eax mov rdx, rsi -.LBB5_4: +.LBB5_5: ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index 8131f3bd54..c5d4a2b0d5 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 -Instructions: 1800 -Total Cycles: 607 -Total uOps: 2000 +Instructions: 1600 +Total Cycles: 507 +Total uOps: 1700 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 2.97 +uOps Per Cycle: 3.35 +IPC: 3.16 Block RThroughput: 5.0 @@ -18,21 +18,19 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 3074457345618258598 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_4 1 1 0.33 mov rax, rdi 1 1 0.33 test al, 3 - 1 1 0.50 setne dil - 1 1 0.33 or dil, cl - 1 1 1.00 jne .LBB5_3 + 1 1 1.00 jne .LBB5_4 1 1 0.50 lea rcx, [rdx + 2*rdx] 1 1 0.33 or rcx, 3 1 1 0.33 add rcx, 9 1 1 0.33 cmp rsi, rcx - 1 1 1.00 jne .LBB5_3 + 1 1 1.00 jne .LBB5_4 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 je .LBB5_4 + 1 1 1.00 je .LBB5_5 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -51,25 +49,23 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.99 5.99 - 6.02 0.50 0.50 + - - 4.98 4.99 - 5.03 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 3074457345618258598 - - - - 1.00 - - - - cmp rdx, rax - - - - - - 2.00 - - seta cl - - - 1.00 - - - - - mov rax, rdi - - - 0.99 0.01 - - - - test al, 3 - - - 1.00 - - - - - setne dil - - - - 0.99 - 0.01 - - or dil, cl - - - - - - 1.00 - - jne .LBB5_3 - - - 0.01 0.99 - - - - lea rcx, [rdx + 2*rdx] - - - - 1.00 - - - - or rcx, 3 - - - 0.99 0.01 - - - - add rcx, 9 - - - - 1.00 - - - - cmp rsi, rcx - - - - - - 1.00 - - jne .LBB5_3 + - - 0.98 0.01 - 0.01 - - movabs rcx, 3074457345618258598 + - - 0.01 0.99 - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_4 + - - 0.99 0.01 - - - - mov rax, rdi + - - 0.01 0.98 - 0.01 - - test al, 3 + - - - - - 1.00 - - jne .LBB5_4 + - - 0.98 0.02 - - - - lea rcx, [rdx + 2*rdx] + - - 0.01 0.99 - - - - or rcx, 3 + - - - 1.00 - - - - add rcx, 9 + - - - 0.99 - 0.01 - - cmp rsi, rcx + - - - - - 1.00 - - jne .LBB5_4 - - 1.00 - - - 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - je .LBB5_4 + - - - - - 1.00 - - je .LBB5_5 - - - - - - - - xor eax, eax - - 1.00 - - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 index ba34b1855b..9054d9c7a1 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -1,13 +1,13 @@ bench_try_ref_from_bytes_with_elems_dynamic_size: - movabs rax, 4611686018427387901 - cmp rdx, rax - seta cl + movabs rcx, 4611686018427387901 + cmp rdx, rcx + ja .LBB5_3 mov rax, rdi - or dil, cl - test dil, 1 - jne .LBB5_3 lea rcx, [2*rdx + 4] cmp rsi, rcx + setne cl + or cl, al + test cl, 1 jne .LBB5_3 cmp word ptr [rax], -16192 je .LBB5_4 diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca index ae049c03df..66d1b87267 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 Instructions: 1500 -Total Cycles: 507 -Total uOps: 1700 +Total Cycles: 474 +Total uOps: 1600 Dispatch Width: 4 -uOps Per Cycle: 3.35 -IPC: 2.96 -Block RThroughput: 4.3 +uOps Per Cycle: 3.38 +IPC: 3.16 +Block RThroughput: 4.0 Instruction Info: @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 4611686018427387901 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_3 1 1 0.33 mov rax, rdi - 1 1 0.33 or dil, cl - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_3 1 1 0.50 lea rcx, [2*rdx + 4] 1 1 0.33 cmp rsi, rcx + 1 1 0.50 setne cl + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 1 1 1.00 jne .LBB5_3 2 6 0.50 * cmp word ptr [rax], -16192 1 1 1.00 je .LBB5_4 @@ -48,22 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.98 4.99 - 5.03 0.50 0.50 + - - 4.66 4.66 - 4.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 4611686018427387901 - - - 0.50 0.50 - - - - cmp rdx, rax - - - 1.96 - - 0.04 - - seta cl - - - 0.01 0.99 - - - - mov rax, rdi - - - 1.00 - - - - - or dil, cl - - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_3 - - - 0.01 0.99 - - - - lea rcx, [2*rdx + 4] - - - 0.02 0.49 - 0.49 - - cmp rsi, rcx + - - 0.33 0.66 - 0.01 - - movabs rcx, 4611686018427387901 + - - 1.00 - - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_3 + - - 0.66 0.01 - 0.33 - - mov rax, rdi + - - 0.33 0.67 - - - - lea rcx, [2*rdx + 4] + - - 0.01 0.99 - - - - cmp rsi, rcx + - - 0.66 - - 0.34 - - setne cl + - - - 1.00 - - - - or cl, al + - - 0.01 0.99 - - - - test cl, 1 - - - - - 1.00 - - jne .LBB5_3 - - - - 0.51 - 0.49 0.50 0.50 cmp word ptr [rax], -16192 + - - 0.99 0.01 - - 0.50 0.50 cmp word ptr [rax], -16192 - - - - - 1.00 - - je .LBB5_4 - - - - - - - - xor eax, eax - - - 0.49 0.51 - - - - mov rdx, rsi + - - 0.67 0.33 - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_dynamic_padding.x86-64 index d832cb7ecf..3cb4e6b574 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64 @@ -1,29 +1,31 @@ bench_try_ref_from_prefix_dynamic_padding: - xor edx, edx - mov eax, 0 - test dil, 3 - je .LBB5_1 - ret -.LBB5_1: movabs rax, 9223372036854775804 - and rsi, rax - cmp rsi, 9 - jae .LBB5_3 + and rax, rsi + cmp rax, 9 + jae .LBB5_2 mov edx, 1 - xor eax, eax + xor ecx, ecx + mov rax, rcx + ret +.LBB5_2: + xor edx, edx + mov ecx, 0 + test dil, 3 + je .LBB5_3 + mov rax, rcx ret .LBB5_3: - add rsi, -9 + add rax, -9 movabs rcx, -6148914691236517205 - mov rax, rsi mul rcx mov rax, rdx shr rax - movzx ecx, word ptr [rdi] - cmp cx, -16192 + movzx esi, word ptr [rdi] + cmp si, -16192 mov edx, 2 cmove rdx, rax - xor eax, eax - cmp ecx, 49344 - cmove rax, rdi + xor ecx, ecx + cmp esi, 49344 + cmove rcx, rdi + mov rax, rcx ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca index 482112a39b..ef17cbfa30 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2600 -Total Cycles: 843 -Total uOps: 2900 +Instructions: 2800 +Total Cycles: 910 +Total uOps: 3100 Dispatch Width: 4 -uOps Per Cycle: 3.44 +uOps Per Cycle: 3.41 IPC: 3.08 -Block RThroughput: 7.3 +Block RThroughput: 7.8 Instruction Info: @@ -18,31 +18,33 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 - 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 - 1 1 0.33 and rsi, rax - 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 0.33 and rax, rsi + 1 1 0.33 cmp rax, 9 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 - 1 0 0.25 xor eax, eax + 1 0 0.25 xor ecx, ecx + 1 1 0.33 mov rax, rcx + 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov ecx, 0 + 1 1 0.33 test dil, 3 + 1 1 1.00 je .LBB5_3 + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret - 1 1 0.33 add rsi, -9 + 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 - 1 1 0.33 mov rax, rsi 2 4 1.00 mul rcx 1 1 0.33 mov rax, rdx 1 1 0.50 shr rax - 1 5 0.50 * movzx ecx, word ptr [rdi] - 1 1 0.33 cmp cx, -16192 + 1 5 0.50 * movzx esi, word ptr [rdi] + 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 2 2 0.67 cmove rdx, rax - 1 0 0.25 xor eax, eax - 1 1 0.33 cmp ecx, 49344 - 2 2 0.67 cmove rax, rdi + 1 0 0.25 xor ecx, ecx + 1 1 0.33 cmp esi, 49344 + 2 2 0.67 cmove rcx, rdi + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret @@ -59,33 +61,35 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 8.33 8.33 - 8.34 0.50 0.50 + - - 9.00 9.00 - 9.00 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.32 0.34 - 0.34 - - mov eax, 0 - - - 0.34 0.33 - 0.33 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - 0.03 0.96 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.96 0.01 - 0.03 - - and rax, rsi + - - 0.96 0.02 - 0.02 - - cmp rax, 9 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.99 - - 0.01 - - mov edx, 1 + - - - - - - - - xor ecx, ecx + - - 0.01 - - 0.99 - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.35 0.65 - - - - movabs rax, 9223372036854775804 - - - 0.96 0.03 - 0.01 - - and rsi, rax - - - 0.01 0.97 - 0.02 - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.67 0.01 - 0.32 - - mov edx, 1 - - - - - - - - - xor eax, eax + - - - - - - - - xor edx, edx + - - 0.01 0.03 - 0.96 - - mov ecx, 0 + - - 0.02 0.97 - 0.01 - - test dil, 3 + - - - - - 1.00 - - je .LBB5_3 + - - 0.02 0.98 - - - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.02 0.34 - 0.64 - - add rsi, -9 - - - 0.33 0.66 - 0.01 - - movabs rcx, -6148914691236517205 - - - 0.66 0.34 - - - - mov rax, rsi + - - - 1.00 - - - - add rax, -9 + - - 1.00 - - - - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 0.01 0.99 - - - - mov rax, rdx - - 0.99 - - 0.01 - - shr rax - - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] - - - 0.33 0.03 - 0.64 - - cmp cx, -16192 - - - 0.01 0.31 - 0.68 - - mov edx, 2 + - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] + - - 0.97 0.02 - 0.01 - - cmp si, -16192 + - - 0.01 0.02 - 0.97 - - mov edx, 2 - - 1.00 1.00 - - - - cmove rdx, rax - - - - - - - - - xor eax, eax - - - 0.33 0.33 - 0.34 - - cmp ecx, 49344 - - - 1.00 1.00 - - - - cmove rax, rdi + - - - - - - - - xor ecx, ecx + - - 0.01 0.01 - 0.98 - - cmp esi, 49344 + - - 1.00 1.00 - - - - cmove rcx, rdi + - - 0.01 0.99 - - - - mov rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64 b/benches/try_ref_from_prefix_dynamic_size.x86-64 index be7f34b9f8..bca29f5523 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64 @@ -1,14 +1,14 @@ bench_try_ref_from_prefix_dynamic_size: - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB5_2 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB5_2: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_4 add rsi, -4 shr rsi movzx ecx, word ptr [rdi] diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca index 11706defe1..bdc62c5367 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_4 1 1 0.33 add rsi, -4 1 1 0.50 shr rsi 1 5 0.50 * movzx ecx, word ptr [rdi] @@ -56,22 +56,22 @@ Resource pressure per iteration: Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.30 0.37 - 0.33 - - mov eax, 0 - - - 0.35 0.32 - 0.33 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.32 0.33 - 0.35 - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.33 0.35 - 0.32 - - mov edx, 1 + - - - 0.35 - 0.65 - - cmp rsi, 4 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.34 0.66 - - - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.34 0.64 - 0.02 - - add rsi, -4 + - - - - - - - - xor edx, edx + - - 0.64 0.34 - 0.02 - - mov eax, 0 + - - 0.33 0.64 - 0.03 - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_4 + - - 0.64 0.34 - 0.02 - - add rsi, -4 - - 1.00 - - - - - shr rsi - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] - - - 0.60 0.40 - - - - cmp ecx, 49344 - - - 0.05 0.95 - - - - mov edx, 2 + - - 0.32 0.38 - 0.30 - - cmp ecx, 49344 + - - 0.03 0.95 - 0.02 - - mov edx, 2 - - 1.00 1.00 - - - - cmove rdx, rsi - - - - - - - - xor eax, eax - - - 0.37 0.31 - 0.32 - - cmp cx, -16192 + - - 0.36 0.01 - 0.63 - - cmp cx, -16192 - - 1.00 1.00 - - - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_static_size.x86-64 b/benches/try_ref_from_prefix_static_size.x86-64 index 83212f776e..804d65c8d5 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64 +++ b/benches/try_ref_from_prefix_static_size.x86-64 @@ -1,8 +1,9 @@ bench_try_ref_from_prefix_static_size: cmp rsi, 6 setb al - or al, dil - test al, 1 + mov ecx, edi + or cl, al + test cl, 1 jne .LBB5_2 movzx eax, word ptr [rdi] cmp eax, 49344 diff --git a/benches/try_ref_from_prefix_static_size.x86-64.mca b/benches/try_ref_from_prefix_static_size.x86-64.mca index 5d02b863a7..27fa1930fe 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64.mca +++ b/benches/try_ref_from_prefix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1200 -Total Cycles: 374 -Total uOps: 1300 +Instructions: 1300 +Total Cycles: 407 +Total uOps: 1400 Dispatch Width: 4 -uOps Per Cycle: 3.48 -IPC: 3.21 -Block RThroughput: 3.3 +uOps Per Cycle: 3.44 +IPC: 3.19 +Block RThroughput: 3.5 Instruction Info: @@ -20,8 +20,9 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 cmp rsi, 6 1 1 0.50 setb al - 1 1 0.33 or al, dil - 1 1 0.33 test al, 1 + 1 1 0.33 mov ecx, edi + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 1 1 1.00 jne .LBB5_2 1 5 0.50 * movzx eax, word ptr [rdi] 1 1 0.33 cmp eax, 49344 @@ -45,18 +46,19 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.66 3.65 - 3.69 0.50 0.50 + - - 3.99 3.99 - 4.02 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.35 0.64 - 0.01 - - cmp rsi, 6 + - - 0.05 0.94 - 0.01 - - cmp rsi, 6 - - 1.00 - - - - - setb al - - - 0.02 0.66 - 0.32 - - or al, dil - - - 0.03 0.65 - 0.32 - - test al, 1 + - - 0.93 0.07 - - - - mov ecx, edi + - - 0.03 0.96 - 0.01 - - or cl, al + - - 0.03 0.02 - 0.95 - - test cl, 1 - - - - - 1.00 - - jne .LBB5_2 - - - - - - 0.50 0.50 movzx eax, word ptr [rdi] - - - 0.92 0.07 - 0.01 - - cmp eax, 49344 - - - 0.37 0.63 - - - - mov eax, 2 + - - 0.02 0.97 - 0.01 - - cmp eax, 49344 + - - 0.96 0.03 - 0.01 - - mov eax, 2 - - 0.97 1.00 - 0.03 - - cmove rax, rdi - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 index 80e66ba160..15273eeb08 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -1,30 +1,28 @@ bench_try_ref_from_prefix_with_elems_dynamic_padding: + mov rcx, rdx + mov edx, 1 movabs rax, 3074457345618258598 - cmp rdx, rax - ja .LBB5_1 - xor ecx, ecx - mov eax, 0 - test dil, 3 - je .LBB5_3 - mov rdx, rcx - ret -.LBB5_3: - lea rax, [rdx + 2*rdx] + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] or rax, 3 add rax, 9 cmp rax, rsi - jbe .LBB5_4 -.LBB5_1: + jbe .LBB5_2 +.LBB5_4: xor eax, eax - mov edx, 1 +.LBB5_5: ret -.LBB5_4: +.LBB5_2: + xor edx, edx + mov eax, 0 + test dil, 3 + jne .LBB5_5 movzx esi, word ptr [rdi] cmp si, -16192 - mov ecx, 2 - cmove rcx, rdx + mov edx, 2 + cmove rdx, rcx xor eax, eax cmp esi, 49344 cmove rax, rdi - mov rdx, rcx ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 512e8ce643..4fc4306581 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2600 -Total Cycles: 806 -Total uOps: 2800 +Instructions: 2400 +Total Cycles: 741 +Total uOps: 2600 Dispatch Width: 4 -uOps Per Cycle: 3.47 -IPC: 3.23 -Block RThroughput: 7.0 +uOps Per Cycle: 3.51 +IPC: 3.24 +Block RThroughput: 6.5 Instruction Info: @@ -18,31 +18,29 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_3 - 1 1 0.33 mov rdx, rcx - 1 1 1.00 U ret - 1 1 0.50 lea rax, [rdx + 2*rdx] + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 1 1 0.33 cmp rax, rsi - 1 1 1.00 jbe .LBB5_4 + 1 1 1.00 jbe .LBB5_2 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 3 + 1 1 1.00 jne .LBB5_5 1 5 0.50 * movzx esi, word ptr [rdi] 1 1 0.33 cmp si, -16192 - 1 1 0.33 mov ecx, 2 - 2 2 0.67 cmove rcx, rdx + 1 1 0.33 mov edx, 2 + 2 2 0.67 cmove rdx, rcx 1 0 0.25 xor eax, eax 1 1 0.33 cmp esi, 49344 2 2 0.67 cmove rax, rdi - 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -59,33 +57,31 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.98 7.99 - 8.03 0.50 0.50 + - - 7.32 7.33 - 7.35 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.98 - - 0.02 - - movabs rax, 3074457345618258598 - - - - 1.00 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - - - - - - - xor ecx, ecx - - - 0.99 0.01 - - - - mov eax, 0 - - - 0.01 0.96 - 0.03 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_3 - - - 0.97 0.01 - 0.02 - - mov rdx, rcx - - - - - - 1.00 - - ret - - - 0.03 0.97 - - - - lea rax, [rdx + 2*rdx] - - - 0.03 0.97 - - - - or rax, 3 - - - 0.01 0.99 - - - - add rax, 9 + - - - 0.99 - 0.01 - - mov rcx, rdx + - - 0.66 0.02 - 0.32 - - mov edx, 1 + - - 0.35 0.32 - 0.33 - - movabs rax, 3074457345618258598 + - - - 0.99 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - 0.99 0.01 - - - - lea rax, [rcx + 2*rcx] + - - 0.33 0.67 - - - - or rax, 3 + - - - 1.00 - - - - add rax, 9 - - - 1.00 - - - - cmp rax, rsi - - - - - - 1.00 - - jbe .LBB5_4 + - - - - - 1.00 - - jbe .LBB5_2 - - - - - - - - xor eax, eax - - - 0.98 0.01 - 0.01 - - mov edx, 1 - - - - - 1.00 - - ret + - - - - - - - - xor edx, edx + - - 0.34 - - 0.66 - - mov eax, 0 + - - 0.99 - - 0.01 - - test dil, 3 + - - - - - 1.00 - - jne .LBB5_5 - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] - - - 0.97 0.03 - - - - cmp si, -16192 - - - 0.98 0.01 - 0.01 - - mov ecx, 2 - - - 1.00 0.03 - 0.97 - - cmove rcx, rdx + - - 0.66 0.01 - 0.33 - - cmp si, -16192 + - - 0.67 0.32 - 0.01 - - mov edx, 2 + - - 1.00 0.99 - 0.01 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.03 0.97 - - - - cmp esi, 49344 - - - 1.00 1.00 - - - - cmove rax, rdi - - - - 0.03 - 0.97 - - mov rdx, rcx + - - 0.33 0.33 - 0.34 - - cmp esi, 49344 + - - 1.00 0.68 - 0.32 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 index c12e87c137..c1b444fde9 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: - movabs rax, 4611686018427387901 - cmp rdx, rax - ja .LBB5_1 mov rcx, rdx - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_5 + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 lea rax, [2*rcx + 4] cmp rax, rsi jbe .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_6 movzx esi, word ptr [rdi] cmp si, -16192 mov edx, 2 @@ -22,5 +22,5 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: xor eax, eax cmp esi, 49344 cmove rax, rdi -.LBB5_5: +.LBB5_6: ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 6c3f1a1ec9..c7bcc8ae1d 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -18,20 +18,20 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 1 1 0.33 mov rcx, rdx - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_5 + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 1 1 0.50 lea rax, [2*rcx + 4] 1 1 0.33 cmp rax, rsi 1 1 1.00 jbe .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_6 1 5 0.50 * movzx esi, word ptr [rdi] 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 @@ -55,29 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.65 6.66 - 6.69 0.50 0.50 + - - 6.66 6.66 - 6.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.66 0.33 - 0.01 - - movabs rax, 4611686018427387901 - - - 0.02 0.66 - 0.32 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.66 0.33 - 0.01 - - mov rcx, rdx - - - - - - - - - xor edx, edx - - - 0.33 0.01 - 0.66 - - mov eax, 0 - - - 0.34 0.65 - 0.01 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.65 0.35 - - - - lea rax, [2*rcx + 4] - - - - 1.00 - - - - cmp rax, rsi + - - 0.01 0.98 - 0.01 - - mov rcx, rdx + - - 0.67 0.01 - 0.32 - - mov edx, 1 + - - 0.33 0.33 - 0.34 - - movabs rax, 4611686018427387901 + - - - 0.99 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.99 0.01 - - - - lea rax, [2*rcx + 4] + - - 0.33 0.67 - - - - cmp rax, rsi - - - - - 1.00 - - jbe .LBB5_4 - - - - - - - - xor eax, eax - - - 0.34 0.01 - 0.65 - - mov edx, 1 - - - - - 1.00 - - ret + - - - - - - - - xor edx, edx + - - 0.34 0.02 - 0.64 - - mov eax, 0 + - - 0.33 0.66 - 0.01 - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_6 - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] - - - 0.65 0.34 - 0.01 - - cmp si, -16192 - - - 0.66 0.34 - - - - mov edx, 2 + - - 0.66 0.34 - - - - cmp si, -16192 + - - 0.33 0.67 - - - - mov edx, 2 - - 1.00 0.99 - 0.01 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.34 0.66 - - - - cmp esi, 49344 - - - 1.00 0.99 - 0.01 - - cmove rax, rdi + - - 0.67 0.32 - 0.01 - - cmp esi, 49344 + - - 1.00 0.67 - 0.33 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_dynamic_padding.x86-64 index b3e9244428..b265188697 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64 @@ -1,11 +1,11 @@ bench_try_ref_from_suffix_dynamic_padding: - lea eax, [rsi + rdi] - test al, 3 - jne .LBB5_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 + jb .LBB5_1 + lea ecx, [rsi + rdi] + test cl, 3 + je .LBB5_3 .LBB5_1: xor eax, eax ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca index d56ae56d85..ad9399513b 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 2300 -Total Cycles: 791 +Total Cycles: 797 Total uOps: 2600 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 2.91 +uOps Per Cycle: 3.26 +IPC: 2.89 Block RThroughput: 6.5 @@ -18,13 +18,13 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] - 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea ecx, [rsi + rdi] + 1 1 0.33 test cl, 3 + 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -56,30 +56,30 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.70 7.58 - 7.72 0.50 0.50 + - - 7.67 7.62 - 7.71 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.26 0.74 - - - - lea eax, [rsi + rdi] - - - 0.19 0.28 - 0.53 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 - - - 0.93 0.06 - 0.01 - - movabs rax, 9223372036854775804 - - - 0.81 0.14 - 0.05 - - and rax, rsi - - - 0.55 0.43 - 0.02 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - 0.60 0.24 - 0.16 - - movabs rax, 9223372036854775804 + - - 0.58 0.17 - 0.25 - - and rax, rsi + - - 0.33 0.60 - 0.07 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.30 0.70 - - - - lea ecx, [rsi + rdi] + - - 0.13 0.57 - 0.30 - - test cl, 3 + - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.42 0.56 - 0.02 - - add rax, -9 - - - 0.67 0.30 - 0.03 - - movabs rcx, -6148914691236517205 + - - 0.72 0.21 - 0.07 - - add rax, -9 + - - 0.69 0.23 - 0.08 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.71 - - 0.29 - - shr rdx - - - 0.32 0.68 - - - - lea rcx, [rdx + 2*rdx] - - - 0.57 0.04 - 0.39 - - sub rsi, rcx - - - 0.28 0.67 - 0.05 - - or rcx, -4 - - - 0.29 0.29 - 0.42 - - add rsi, rdi - - - 0.02 0.98 - - - - lea rdi, [rcx + rsi] - - - 0.02 0.41 - 0.57 - - add rdi, -8 + - - 0.60 - - 0.40 - - shr rdx + - - 0.50 0.50 - - - - lea rcx, [rdx + 2*rdx] + - - 0.44 0.25 - 0.31 - - sub rsi, rcx + - - 0.52 0.34 - 0.14 - - or rcx, -4 + - - 0.28 0.46 - 0.26 - - add rsi, rdi + - - 0.06 0.94 - - - - lea rdi, [rcx + rsi] + - - - 0.37 - 0.63 - - add rdi, -8 - - - - - - - - xor eax, eax - - - 0.57 0.01 - 0.42 0.50 0.50 cmp word ptr [rcx + rsi - 8], -16192 - - - 0.09 0.99 - 0.92 - - cmove rax, rdi + - - 0.58 0.06 - 0.36 0.50 0.50 cmp word ptr [rcx + rsi - 8], -16192 + - - 0.34 0.98 - 0.68 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64 b/benches/try_ref_from_suffix_dynamic_size.x86-64 index d51f7817e5..f175802bae 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64 @@ -1,13 +1,9 @@ bench_try_ref_from_suffix_dynamic_size: - lea eax, [rsi + rdi] cmp rsi, 4 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rdx, [rsi - 4] shr rdx and esi, 1 @@ -16,3 +12,6 @@ bench_try_ref_from_suffix_dynamic_size: cmp word ptr [rdi + rsi], -16192 cmove rax, rcx ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca index 6cf7f8e493..37b19a1fab 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1600 -Total Cycles: 510 -Total uOps: 1800 +Instructions: 1500 +Total Cycles: 476 +Total uOps: 1700 Dispatch Width: 4 -uOps Per Cycle: 3.53 -IPC: 3.14 -Block RThroughput: 4.5 +uOps Per Cycle: 3.57 +IPC: 3.15 +Block RThroughput: 4.3 Instruction Info: @@ -18,14 +18,11 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 4 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rdx, [rsi - 4] 1 1 0.50 shr rdx 1 1 0.33 and esi, 1 @@ -34,6 +31,8 @@ Instruction Info: 2 6 0.50 * cmp word ptr [rdi + rsi], -16192 2 2 0.67 cmove rax, rcx 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -49,23 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.99 5.00 - 5.01 0.50 0.50 + - - 4.66 4.66 - 4.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.98 0.02 - - - - lea eax, [rsi + rdi] - - - - 0.98 - 0.02 - - cmp rsi, 4 - - - 1.00 - - - - - setb cl - - - 0.01 0.99 - - - - or cl, al - - - 0.01 0.07 - 0.92 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.58 0.41 - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.41 0.59 - - - - lea eax, [rsi + rdi] + - - 0.28 0.72 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.70 0.30 - - - - lea rdx, [rsi - 4] + - - 0.68 - - 0.32 - - shr rdx + - - 0.61 0.32 - 0.07 - - and esi, 1 + - - 0.28 0.72 - - - - lea rcx, [rdi + rsi] - - - - - - - - xor eax, eax + - - 0.12 0.60 - 0.28 0.50 0.50 cmp word ptr [rdi + rsi], -16192 + - - 1.00 1.00 - - - - cmove rax, rcx - - - - - 1.00 - - ret - - - 0.93 0.07 - - - - lea rdx, [rsi - 4] - - - 0.93 - - 0.07 - - shr rdx - - - 0.06 0.93 - 0.01 - - and esi, 1 - - - 0.07 0.93 - - - - lea rcx, [rdi + rsi] - - - - - - - - xor eax, eax - - - - 0.01 - 0.99 0.50 0.50 cmp word ptr [rdi + rsi], -16192 - - - 1.00 1.00 - - - - cmove rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_static_size.x86-64 b/benches/try_ref_from_suffix_static_size.x86-64 index cd39f70931..e917f89bbb 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64 +++ b/benches/try_ref_from_suffix_static_size.x86-64 @@ -1,16 +1,15 @@ bench_try_ref_from_suffix_static_size: - lea eax, [rsi + rdi] cmp rsi, 6 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rcx, [rdi + rsi] add rcx, -6 xor eax, eax cmp word ptr [rdi + rsi - 6], -16192 cmove rax, rcx ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/try_ref_from_suffix_static_size.x86-64.mca b/benches/try_ref_from_suffix_static_size.x86-64.mca index 087d1e7ed9..1227e4103d 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64.mca +++ b/benches/try_ref_from_suffix_static_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 -Instructions: 1400 -Total Cycles: 443 -Total uOps: 1600 +Instructions: 1300 +Total Cycles: 410 +Total uOps: 1500 Dispatch Width: 4 -uOps Per Cycle: 3.61 -IPC: 3.16 +uOps Per Cycle: 3.66 +IPC: 3.17 Block RThroughput: 4.0 @@ -18,20 +18,19 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 6 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rcx, [rdi + rsi] 1 1 0.33 add rcx, -6 1 0 0.25 xor eax, eax 2 6 0.50 * cmp word ptr [rdi + rsi - 6], -16192 2 2 0.67 cmove rax, rcx 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -47,21 +46,20 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.33 4.33 - 4.34 0.50 0.50 + - - 3.98 3.98 - 4.04 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.32 0.68 - - - - lea eax, [rsi + rdi] - - - 0.05 0.94 - 0.01 - - cmp rsi, 6 - - - 1.00 - - - - - setb cl - - - 0.95 0.05 - - - - or cl, al - - - 0.95 0.02 - 0.03 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.03 0.96 - 0.01 - - cmp rsi, 6 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.95 0.05 - - - - lea eax, [rsi + rdi] + - - 0.06 0.94 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.94 0.06 - - - - lea rcx, [rdi + rsi] + - - 0.05 0.95 - - - - add rcx, -6 - - - - - - - - xor eax, eax + - - 0.95 0.04 - 0.01 0.50 0.50 cmp word ptr [rdi + rsi - 6], -16192 + - - 1.00 0.98 - 0.02 - - cmove rax, rcx - - - - - 1.00 - - ret - - - 0.04 0.96 - - - - lea rcx, [rdi + rsi] - - - 0.02 0.97 - 0.01 - - add rcx, -6 - - - - - - - - xor eax, eax - - - 0.03 0.66 - 0.31 0.50 0.50 cmp word ptr [rdi + rsi - 6], -16192 - - - 0.97 0.05 - 0.98 - - cmove rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 index c7530d8b68..91dc7251d3 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -1,32 +1,31 @@ bench_try_ref_from_suffix_with_elems_dynamic_padding: + mov rcx, rdx + mov edx, 1 movabs rax, 3074457345618258598 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 3 - je .LBB5_3 - mov rdx, rcx - ret -.LBB5_3: - lea rax, [rdx + 2*rdx] + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] or rax, 3 add rax, 9 - sub rsi, rax - jae .LBB5_4 -.LBB5_1: + mov r8, rsi + sub r8, rax + jae .LBB5_2 +.LBB5_4: xor eax, eax - mov edx, 1 +.LBB5_5: ret -.LBB5_4: - lea r8, [rdi + rsi] - movzx esi, word ptr [rdi + rsi] - cmp si, -16192 - mov ecx, 2 - cmove rcx, rdx +.LBB5_2: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 3 + jne .LBB5_5 + lea rsi, [rdi + r8] + movzx edi, word ptr [rdi + r8] + cmp di, -16192 + mov edx, 2 + cmove rdx, rcx xor eax, eax - cmp esi, 49344 - cmove rax, r8 - mov rdx, rcx + cmp edi, 49344 + cmove rax, rsi ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index be736c00c2..198346b5fb 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2800 -Total Cycles: 878 -Total uOps: 3000 +Instructions: 2700 +Total Cycles: 1304 +Total uOps: 2900 Dispatch Width: 4 -uOps Per Cycle: 3.42 -IPC: 3.19 -Block RThroughput: 7.5 +uOps Per Cycle: 2.22 +IPC: 2.07 +Block RThroughput: 7.3 Instruction Info: @@ -18,33 +18,32 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 1 1 0.33 movabs rax, 3074457345618258598 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 3 - 1 1 1.00 je .LBB5_3 - 1 1 0.33 mov rdx, rcx - 1 1 1.00 U ret - 1 1 0.50 lea rax, [rdx + 2*rdx] + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 - 1 1 0.33 sub rsi, rax - 1 1 1.00 jae .LBB5_4 + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax + 1 1 1.00 jae .LBB5_2 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.50 lea r8, [rdi + rsi] - 1 5 0.50 * movzx esi, word ptr [rdi + rsi] - 1 1 0.33 cmp si, -16192 - 1 1 0.33 mov ecx, 2 - 2 2 0.67 cmove rcx, rdx + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 3 + 1 1 1.00 jne .LBB5_5 + 1 1 0.50 lea rsi, [rdi + r8] + 1 5 0.50 * movzx edi, word ptr [rdi + r8] + 1 1 0.33 cmp di, -16192 + 1 1 0.33 mov edx, 2 + 2 2 0.67 cmove rdx, rcx 1 0 0.25 xor eax, eax - 1 1 0.33 cmp esi, 49344 - 2 2 0.67 cmove rax, r8 - 1 1 0.33 mov rdx, rcx + 1 1 0.33 cmp edi, 49344 + 2 2 0.67 cmove rax, rsi 1 1 1.00 U ret @@ -61,35 +60,34 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 8.65 8.65 - 8.70 0.50 0.50 + - - 8.01 8.49 - 8.50 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.67 0.30 - 0.03 - - movabs rax, 3074457345618258598 - - - 0.01 0.99 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.99 0.01 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.35 0.62 - 0.03 - - mov eax, 0 - - - 0.99 0.01 - - - - test r8b, 3 - - - - - - 1.00 - - je .LBB5_3 - - - 0.68 0.30 - 0.02 - - mov rdx, rcx - - - - - - 1.00 - - ret - - - 0.07 0.93 - - - - lea rax, [rdx + 2*rdx] - - - 0.06 0.35 - 0.59 - - or rax, 3 - - - 0.02 0.07 - 0.91 - - add rax, 9 - - - 0.01 0.04 - 0.95 - - sub rsi, rax - - - - - - 1.00 - - jae .LBB5_4 + - - 0.48 0.50 - 0.02 - - mov rcx, rdx + - - 0.02 0.52 - 0.46 - - mov edx, 1 + - - 0.49 0.51 - - - - movabs rax, 3074457345618258598 + - - 0.51 0.48 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - 0.48 0.52 - - - - lea rax, [rcx + 2*rcx] + - - 0.52 0.48 - - - - or rax, 3 + - - 0.52 0.47 - 0.01 - - add rax, 9 + - - 0.48 0.52 - - - - mov r8, rsi + - - 0.51 0.01 - 0.48 - - sub r8, rax + - - - - - 1.00 - - jae .LBB5_2 - - - - - - - - xor eax, eax - - - 0.92 0.01 - 0.07 - - mov edx, 1 - - - - - 1.00 - - ret - - - - 1.00 - - - - lea r8, [rdi + rsi] - - - - - - - 0.50 0.50 movzx esi, word ptr [rdi + rsi] - - - 0.01 0.99 - - - - cmp si, -16192 - - - 0.88 0.04 - 0.08 - - mov ecx, 2 - - - 1.00 0.99 - 0.01 - - cmove rcx, rdx + - - 0.01 0.50 - 0.49 - - add esi, edi + - - - - - - - - xor edx, edx + - - 0.04 0.95 - 0.01 - - mov eax, 0 + - - 0.01 0.50 - 0.49 - - test sil, 3 + - - - - - 1.00 - - jne .LBB5_5 + - - 0.50 0.50 - - - - lea rsi, [rdi + r8] + - - - - - - 0.50 0.50 movzx edi, word ptr [rdi + r8] + - - 0.97 0.02 - 0.01 - - cmp di, -16192 + - - 0.48 0.51 - 0.01 - - mov edx, 2 + - - 0.99 0.51 - 0.50 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.99 0.01 - - - - cmp esi, 49344 - - - 1.00 1.00 - - - - cmove rax, r8 - - - - 0.99 - 0.01 - - mov rdx, rcx + - - 0.02 0.48 - 0.50 - - cmp edi, 49344 + - - 0.98 0.51 - 0.51 - - cmove rax, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 index 952eb12de8..ee0c7db854 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,28 +1,29 @@ bench_try_ref_from_suffix_with_elems_dynamic_size: + mov rcx, rdx + mov edx, 1 movabs rax, 4611686018427387901 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 1 - jne .LBB5_5 - lea rax, [2*rdx + 4] - sub rsi, rax + cmp rcx, rax + ja .LBB5_3 + lea rax, [2*rcx + 4] + mov r8, rsi + sub r8, rax jae .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: - lea r8, [rdi + rsi] - movzx esi, word ptr [rdi + rsi] - cmp si, -16192 - mov ecx, 2 - cmove rcx, rdx + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 1 + jne .LBB5_6 + lea rsi, [rdi + r8] + movzx edi, word ptr [rdi + r8] + cmp di, -16192 + mov edx, 2 + cmove rdx, rcx xor eax, eax - cmp esi, 49344 - cmove rax, r8 -.LBB5_5: - mov rdx, rcx + cmp edi, 49344 + cmove rax, rsi +.LBB5_6: ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca index d4f78f67a2..7eb924c596 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2400 -Total Cycles: 1107 -Total uOps: 2600 +Instructions: 2500 +Total Cycles: 1105 +Total uOps: 2700 Dispatch Width: 4 -uOps Per Cycle: 2.35 -IPC: 2.17 -Block RThroughput: 6.5 +uOps Per Cycle: 2.44 +IPC: 2.26 +Block RThroughput: 6.8 Instruction Info: @@ -18,29 +18,30 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 1 1 0.33 movabs rax, 4611686018427387901 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 - 1 1 0.50 lea rax, [2*rdx + 4] - 1 1 0.33 sub rsi, rax + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [2*rcx + 4] + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax 1 1 1.00 jae .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.50 lea r8, [rdi + rsi] - 1 5 0.50 * movzx esi, word ptr [rdi + rsi] - 1 1 0.33 cmp si, -16192 - 1 1 0.33 mov ecx, 2 - 2 2 0.67 cmove rcx, rdx + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 1 + 1 1 1.00 jne .LBB5_6 + 1 1 0.50 lea rsi, [rdi + r8] + 1 5 0.50 * movzx edi, word ptr [rdi + r8] + 1 1 0.33 cmp di, -16192 + 1 1 0.33 mov edx, 2 + 2 2 0.67 cmove rdx, rcx 1 0 0.25 xor eax, eax - 1 1 0.33 cmp esi, 49344 - 2 2 0.67 cmove rax, r8 - 1 1 0.33 mov rdx, rcx + 1 1 0.33 cmp edi, 49344 + 2 2 0.67 cmove rax, rsi 1 1 1.00 U ret @@ -57,31 +58,32 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.99 7.00 - 8.01 0.50 0.50 + - - 7.50 7.52 - 7.98 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.02 0.95 - 0.03 - - movabs rax, 4611686018427387901 - - - 0.93 0.04 - 0.03 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.96 0.04 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.95 0.02 - 0.03 - - mov eax, 0 - - - 0.95 0.05 - - - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.06 0.94 - - - - lea rax, [2*rdx + 4] - - - 0.93 0.07 - - - - sub rsi, rax + - - 0.47 0.52 - 0.01 - - mov rcx, rdx + - - 0.50 0.49 - 0.01 - - mov edx, 1 + - - 0.49 0.49 - 0.02 - - movabs rax, 4611686018427387901 + - - 0.48 0.51 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.51 0.49 - - - - lea rax, [2*rcx + 4] + - - 0.49 0.51 - - - - mov r8, rsi + - - 0.48 0.52 - - - - sub r8, rax - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - xor eax, eax - - - 0.03 0.95 - 0.02 - - mov edx, 1 - - - - - 1.00 - - ret - - - 0.97 0.03 - - - - lea r8, [rdi + rsi] - - - - - - - 0.50 0.50 movzx esi, word ptr [rdi + rsi] - - - 0.03 0.97 - - - - cmp si, -16192 - - - 0.05 0.94 - 0.01 - - mov ecx, 2 - - - 0.06 0.98 - 0.96 - - cmove rcx, rdx + - - 0.47 0.47 - 0.06 - - add esi, edi + - - - - - - - - xor edx, edx + - - 0.51 0.49 - - - - mov eax, 0 + - - 0.47 0.47 - 0.06 - - test sil, 1 + - - - - - 1.00 - - jne .LBB5_6 + - - 0.52 0.48 - - - - lea rsi, [rdi + r8] + - - - - - - 0.50 0.50 movzx edi, word ptr [rdi + r8] + - - 0.50 0.04 - 0.46 - - cmp di, -16192 + - - 0.49 0.50 - 0.01 - - mov edx, 2 + - - 0.54 0.52 - 0.94 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.97 0.03 - - - - cmp esi, 49344 - - - 0.06 0.96 - 0.98 - - cmove rax, r8 - - - 0.02 0.03 - 0.95 - - mov rdx, rcx + - - 0.04 0.49 - 0.47 - - cmp edi, 49344 + - - 0.54 0.53 - 0.93 - - cmove rax, rsi - - - - - 1.00 - - ret diff --git a/src/layout.rs b/src/layout.rs index 19ad5ca85f..e2b322e8a6 100644 --- a/src/layout.rs +++ b/src/layout.rs @@ -638,37 +638,7 @@ impl DstLayout { addr.checked_add(bytes_len).is_some(), "`addr` + `bytes_len` > usize::MAX" ); - - // Alignment checks go in their own block to avoid introducing variables - // into the top-level scope. - { - // We check alignment for `addr` (for prefix casts) or `addr + - // bytes_len` (for suffix casts). For a prefix cast, the correctness - // of this check is trivial - `addr` is the address the object will - // live at. - // - // For a suffix cast, we know that all valid sizes for the type are - // a multiple of the alignment (and by safety precondition, we know - // `DstLayout` may only describe valid Rust types). Thus, a - // validly-sized instance which lives at a validly-aligned address - // must also end at a validly-aligned address. Thus, if the end - // address for a suffix cast (`addr + bytes_len`) is not aligned, - // then no valid start address will be aligned either. - let offset = match cast_type { - CastType::Prefix => 0, - CastType::Suffix => bytes_len, - }; - - // Addition is guaranteed not to overflow because `offset <= - // bytes_len`, and `addr + bytes_len <= usize::MAX` is a - // precondition of this method. Modulus is guaranteed not to divide - // by 0 because `align` is non-zero. - #[allow(clippy::arithmetic_side_effects)] - if (addr + offset) % self.align.get() != 0 { - return Err(MetadataCastError::Alignment); - } - } - + let (elems, self_bytes) = match size_info { SizeInfo::Sized { size } => { if size > bytes_len { @@ -682,7 +652,7 @@ impl DstLayout { // multiple of the alignment, or will be larger than // `bytes_len`. let max_total_bytes = - util::round_down_to_next_multiple_of_alignment(bytes_len, self.align); + util::round_down_to_next_multiple_of_alignment(bytes_len, self.align); // Calculate the maximum number of bytes that could be consumed // by the trailing slice. // @@ -693,7 +663,7 @@ impl DstLayout { // `bytes_len` too small even for 0 trailing slice elements. None => return Err(MetadataCastError::Size), }; - + // Calculate the number of elements that fit in // `max_slice_and_padding_bytes`; any remaining bytes will be // considered padding. @@ -728,10 +698,40 @@ impl DstLayout { // `self_bytes` up to `max_total_bytes`. #[allow(clippy::arithmetic_side_effects)] let self_bytes = - without_padding + util::padding_needed_for(without_padding, self.align); + without_padding + util::padding_needed_for(without_padding, self.align); (elems, self_bytes) } }; + + // Alignment checks go in their own block to avoid introducing variables + // into the top-level scope. + { + // We check alignment for `addr` (for prefix casts) or `addr + + // bytes_len` (for suffix casts). For a prefix cast, the correctness + // of this check is trivial - `addr` is the address the object will + // live at. + // + // For a suffix cast, we know that all valid sizes for the type are + // a multiple of the alignment (and by safety precondition, we know + // `DstLayout` may only describe valid Rust types). Thus, a + // validly-sized instance which lives at a validly-aligned address + // must also end at a validly-aligned address. Thus, if the end + // address for a suffix cast (`addr + bytes_len`) is not aligned, + // then no valid start address will be aligned either. + let offset = match cast_type { + CastType::Prefix => 0, + CastType::Suffix => bytes_len, + }; + + // Addition is guaranteed not to overflow because `offset <= + // bytes_len`, and `addr + bytes_len <= usize::MAX` is a + // precondition of this method. Modulus is guaranteed not to divide + // by 0 because `align` is non-zero. + #[allow(clippy::arithmetic_side_effects)] + if (addr + offset) % self.align.get() != 0 { + return Err(MetadataCastError::Alignment); + } + } __const_debug_assert!(self_bytes <= bytes_len);