diff --git a/cranelift/assembler-x64/meta/src/instructions/add.rs b/cranelift/assembler-x64/meta/src/instructions/add.rs index df802670ce17..f7c22e6887db 100644 --- a/cranelift/assembler-x64/meta/src/instructions/add.rs +++ b/cranelift/assembler-x64/meta/src/instructions/add.rs @@ -21,6 +21,9 @@ pub fn list() -> Vec { inst("addw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x3]).r(), _64b | compat), inst("addl", fmt("RM", [rw(r32), r(rm32)]), rex(0x3).r(), _64b | compat), inst("addq", fmt("RM", [rw(r64), r(rm64)]), rex(0x3).w().r(), _64b), + // SSE vector instructions + inst("addps", fmt("A", [rw(xmm), r(rm128)]), rex([0x0F, 0x58]).r(), _64b), + inst("addpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x58]).r(), _64b), // Add with carry. inst("adcb", fmt("I", [rw(al), r(imm8)]), rex(0x14).ib(), _64b | compat), inst("adcw", fmt("I", [rw(ax), r(imm16)]), rex([0x66, 0x15]).iw(), _64b | compat), diff --git a/cranelift/assembler-x64/meta/src/instructions/and.rs b/cranelift/assembler-x64/meta/src/instructions/and.rs index 5997de9f77e0..aae4c449853a 100644 --- a/cranelift/assembler-x64/meta/src/instructions/and.rs +++ b/cranelift/assembler-x64/meta/src/instructions/and.rs @@ -28,5 +28,8 @@ pub fn list() -> Vec { inst("andw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x23]).r(), _64b | compat), inst("andl", fmt("RM", [rw(r32), r(rm32)]), rex(0x23).r(), _64b | compat), inst("andq", fmt("RM", [rw(r64), r(rm64)]), rex(0x23).w().r(), _64b), + // SSE vector instructions + inst("andps", fmt("A", [rw(xmm), r(rm128)]), rex([0x0F, 0x54]).r(), _64b), + inst("andpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x54]).r(), _64b), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/or.rs b/cranelift/assembler-x64/meta/src/instructions/or.rs index 2b918991ef24..ee9208748feb 100644 --- a/cranelift/assembler-x64/meta/src/instructions/or.rs +++ b/cranelift/assembler-x64/meta/src/instructions/or.rs @@ -21,6 +21,8 @@ pub fn list() -> Vec { inst("orw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x0B]).r(), _64b | compat), inst("orl", fmt("RM", [rw(r32), r(rm32)]), rex(0x0B).r(), _64b | compat), inst("orq", fmt("RM", [rw(r64), r(rm64)]), rex(0x0B).w().r(), _64b), + // SSE vector instructions + inst("orps", fmt("A", [rw(xmm), r(rm128)]), rex([0x0F, 0x56]).r(), _64b), inst("orpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x56]).r(), _64b), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/sub.rs b/cranelift/assembler-x64/meta/src/instructions/sub.rs index edaf75d4045d..659a38d7f2ce 100644 --- a/cranelift/assembler-x64/meta/src/instructions/sub.rs +++ b/cranelift/assembler-x64/meta/src/instructions/sub.rs @@ -21,6 +21,9 @@ pub fn list() -> Vec { inst("subw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x2B]).r(), _64b | compat), inst("subl", fmt("RM", [rw(r32), r(rm32)]), rex(0x2B).r(), _64b | compat), inst("subq", fmt("RM", [rw(r64), r(rm64)]), rex(0x2B).w().r(), _64b), + // SSE vector instructions + inst("subps", fmt("A", [rw(xmm), r(rm128)]), rex([0x0F, 0x5C]).r(), _64b), + inst("subpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x5C]).r(), _64b), // Subtract with borrow. inst("sbbb", fmt("I", [rw(al), r(imm8)]), rex(0x1C).ib(), _64b | compat), inst("sbbw", fmt("I", [rw(ax), r(imm16)]), rex([0x66, 0x1D]).iw(), _64b | compat), diff --git a/cranelift/assembler-x64/meta/src/instructions/xor.rs b/cranelift/assembler-x64/meta/src/instructions/xor.rs index 2529c7dd98ab..57fc7bfcf1cc 100644 --- a/cranelift/assembler-x64/meta/src/instructions/xor.rs +++ b/cranelift/assembler-x64/meta/src/instructions/xor.rs @@ -21,5 +21,8 @@ pub fn list() -> Vec { inst("xorw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x33]).r(), _64b | compat), inst("xorl", fmt("RM", [rw(r32), r(rm32)]), rex(0x33).r(), _64b | compat), inst("xorq", fmt("RM", [rw(r64), r(rm64)]), rex(0x33).w().r(), _64b), + // SSE vector instructions + inst("xorps", fmt("A", [rw(xmm), r(rm128)]), rex([0x0F, 0x57]).r(), _64b), + inst("xorpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x57]).r(), _64b), ] } diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index f6d8ca482def..6f076ed141b7 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -3568,19 +3568,17 @@ ;; Helper for creating `andps` instructions. (decl x64_andps (Xmm XmmMem) Xmm) -(rule 0 (x64_andps src1 src2) - (xmm_rm_r (SseOpcode.Andps) src1 src2)) (rule 1 (x64_andps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vandps) src1 src2)) +(rule 0 (x64_andps src1 src2) (x64_andps_a src1 src2)) ;; Helper for creating `andpd` instructions. (decl x64_andpd (Xmm XmmMem) Xmm) -(rule 0 (x64_andpd src1 src2) - (xmm_rm_r (SseOpcode.Andpd) src1 src2)) (rule 1 (x64_andpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vandpd) src1 src2)) +(rule 0 (x64_andpd src1 src2) (x64_andpd_a src1 src2)) ;; Helper for creating `por` instructions. (decl x64_por (Xmm XmmMem) Xmm) @@ -3592,11 +3590,10 @@ ;; Helper for creating `orps` instructions. (decl x64_orps (Xmm XmmMem) Xmm) -(rule 0 (x64_orps src1 src2) - (xmm_rm_r (SseOpcode.Orps) src1 src2)) (rule 1 (x64_orps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vorps) src1 src2)) +(rule 0 (x64_orps src1 src2) (x64_orps_a src1 src2)) ;; Helper for creating `orpd` instructions. (decl x64_orpd (Xmm XmmMem) Xmm) @@ -3617,19 +3614,17 @@ ;; Helper fxor creating `xorps` instructions. (decl x64_xorps (Xmm XmmMem) Xmm) -(rule 0 (x64_xorps src1 src2) - (xmm_rm_r (SseOpcode.Xorps) src1 src2)) (rule 1 (x64_xorps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vxorps) src1 src2)) +(rule 0 (x64_xorps src1 src2) (x64_xorps_a src1 src2)) ;; Helper fxor creating `xorpd` instructions. (decl x64_xorpd (Xmm XmmMem) Xmm) -(rule 0 (x64_xorpd src1 src2) - (xmm_rm_r (SseOpcode.Xorpd) src1 src2)) (rule 1 (x64_xorpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vxorpd) src1 src2)) +(rule 0 (x64_xorpd src1 src2) (x64_xorpd_a src1 src2)) ;; Helper for creating `pmullw` instructions. (decl x64_pmullw (Xmm XmmMem) Xmm) @@ -3801,19 +3796,17 @@ ;; Helper for creating `addps` instructions. (decl x64_addps (Xmm XmmMem) Xmm) -(rule 0 (x64_addps src1 src2) - (xmm_rm_r (SseOpcode.Addps) src1 src2)) (rule 1 (x64_addps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vaddps) src1 src2)) +(rule 0 (x64_addps src1 src2) (x64_addps_a src1 src2)) ;; Helper for creating `addpd` instructions. (decl x64_addpd (Xmm XmmMem) Xmm) -(rule 0 (x64_addpd src1 src2) - (xmm_rm_r (SseOpcode.Addpd) src1 src2)) (rule 1 (x64_addpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vaddpd) src1 src2)) +(rule 0 (x64_addpd src1 src2) (x64_addpd_a src1 src2)) ;; Helper for creating `subss` instructions. (decl x64_subss (Xmm XmmMem) Xmm) @@ -3833,19 +3826,17 @@ ;; Helper for creating `subps` instructions. (decl x64_subps (Xmm XmmMem) Xmm) -(rule 0 (x64_subps src1 src2) - (xmm_rm_r (SseOpcode.Subps) src1 src2)) (rule 1 (x64_subps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vsubps) src1 src2)) +(rule 0 (x64_subps src1 src2) (x64_subps_a src1 src2)) ;; Helper for creating `subpd` instructions. (decl x64_subpd (Xmm XmmMem) Xmm) -(rule 0 (x64_subpd src1 src2) - (xmm_rm_r (SseOpcode.Subpd) src1 src2)) (rule 1 (x64_subpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vsubpd) src1 src2)) +(rule 0 (x64_subpd src1 src2) (x64_subpd_a src1 src2)) ;; Helper for creating `mulss` instructions. (decl x64_mulss (Xmm XmmMem) Xmm) diff --git a/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif b/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif index 8ffffcdd2d49..bb83982f1d30 100644 --- a/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif +++ b/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif @@ -12,7 +12,7 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addps %xmm0, %xmm1, %xmm0 +; addps %xmm1, %xmm0 ; movl $2143289344, %r10d ; movd %r10d, %xmm7 ; shufps $0, %xmm7, const(0), %xmm7 diff --git a/cranelift/filetests/filetests/isa/x64/f128const.clif b/cranelift/filetests/filetests/isa/x64/f128const.clif index 22ee804d6d6f..8d96cbc872e7 100644 --- a/cranelift/filetests/filetests/isa/x64/f128const.clif +++ b/cranelift/filetests/filetests/isa/x64/f128const.clif @@ -12,7 +12,7 @@ block0(): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/f16const.clif b/cranelift/filetests/filetests/isa/x64/f16const.clif index edcaf2ac29fd..86878010245a 100644 --- a/cranelift/filetests/filetests/isa/x64/f16const.clif +++ b/cranelift/filetests/filetests/isa/x64/f16const.clif @@ -12,7 +12,7 @@ block0(): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fabs.clif b/cranelift/filetests/filetests/isa/x64/fabs.clif index c8b621285c5a..28523f44d1d9 100644 --- a/cranelift/filetests/filetests/isa/x64/fabs.clif +++ b/cranelift/filetests/filetests/isa/x64/fabs.clif @@ -13,7 +13,7 @@ block0(v0: f32): ; block0: ; movl $2147483647, %eax ; movd %eax, %xmm4 -; andps %xmm0, %xmm4, %xmm0 +; andps %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -42,7 +42,7 @@ block0(v0: f64): ; block0: ; movabsq $9223372036854775807, %rax ; movq %rax, %xmm4 -; andpd %xmm0, %xmm4, %xmm0 +; andpd %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -72,7 +72,7 @@ block0(v0: f32x4): ; uninit %xmm4 ; pcmpeqd %xmm4, %xmm4, %xmm4 ; psrld %xmm4, $1, %xmm4 -; andps %xmm0, %xmm4, %xmm0 +; andps %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -102,7 +102,7 @@ block0(v0: f64x2): ; uninit %xmm4 ; pcmpeqd %xmm4, %xmm4, %xmm4 ; psrlq %xmm4, $1, %xmm4 -; andpd %xmm0, %xmm4, %xmm0 +; andpd %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index b12e5452234e..8050b97f8e74 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -241,7 +241,7 @@ block0(v0: i64): ; subq %rsp, $48, %rsp ; block0: ; uninit %xmm3 -; xorpd %xmm3, %xmm3, %xmm3 +; xorpd %xmm3, %xmm3 ; cvtsi2sd %xmm3, %rcx, %xmm3 ; movq %rcx, 32(%rsp) ; movq %rcx, 40(%rsp) diff --git a/cranelift/filetests/filetests/isa/x64/fcopysign.clif b/cranelift/filetests/filetests/isa/x64/fcopysign.clif index 19d288482a4f..0cf64c1c39d4 100644 --- a/cranelift/filetests/filetests/isa/x64/fcopysign.clif +++ b/cranelift/filetests/filetests/isa/x64/fcopysign.clif @@ -16,8 +16,8 @@ block0(v0: f32, v1: f32): ; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 ; andnps %xmm0, %xmm2, %xmm0 -; andps %xmm7, %xmm1, %xmm7 -; orps %xmm0, %xmm7, %xmm0 +; andps %xmm1, %xmm7 +; orps %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -53,7 +53,7 @@ block0(v0: f64, v1: f64): ; movdqa %xmm0, %xmm2 ; movdqa %xmm7, %xmm0 ; andnpd %xmm0, %xmm2, %xmm0 -; andpd %xmm7, %xmm1, %xmm7 +; andpd %xmm1, %xmm7 ; orpd %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index ae506aa096d1..0d73f6c486b4 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -12,7 +12,7 @@ block0(v0: i8): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movsbl %dil, %r9d ; cvtsi2ss %xmm0, %r9d, %xmm0 ; movq %rbp, %rsp @@ -42,7 +42,7 @@ block0(v0: i16): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movswl %di, %r9d ; cvtsi2ss %xmm0, %r9d, %xmm0 ; movq %rbp, %rsp @@ -72,7 +72,7 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; cvtsi2ss %xmm0, %edi, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -100,7 +100,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; cvtsi2ss %xmm0, %rdi, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -128,7 +128,7 @@ block0(v0: i8): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; movsbl %dil, %r9d ; cvtsi2sd %xmm0, %r9d, %xmm0 ; movq %rbp, %rsp @@ -158,7 +158,7 @@ block0(v0: i16): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; movswl %di, %r9d ; cvtsi2sd %xmm0, %r9d, %xmm0 ; movq %rbp, %rsp @@ -188,7 +188,7 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; cvtsi2sd %xmm0, %edi, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -216,7 +216,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; cvtsi2sd %xmm0, %rdi, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -276,15 +276,15 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64): ; movq %rsp, %rbp ; block0: ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movzbq %dil, %r8 ; cvtsi2ss %xmm0, %r8, %xmm0 ; uninit %xmm6 -; xorps %xmm6, %xmm6, %xmm6 +; xorps %xmm6, %xmm6 ; movzwq %si, %r8 ; cvtsi2ss %xmm6, %r8, %xmm6 ; uninit %xmm7 -; xorps %xmm7, %xmm7, %xmm7 +; xorps %xmm7, %xmm7 ; movl %edx, %r8d ; cvtsi2ss %xmm7, %r8, %xmm7 ; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx @@ -339,7 +339,7 @@ block0(v0: i32x4): ; movq %rsp, %rbp ; block0: ; unpcklps %xmm0, const(0), %xmm0 -; subpd %xmm0, const(1), %xmm0 +; subpd (%rip), %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -390,8 +390,8 @@ block0(v0: i32x4): ; cvtdq2ps %xmm3, %xmm1 ; psrld %xmm0, $1, %xmm0 ; cvtdq2ps %xmm0, %xmm0 -; addps %xmm0, %xmm0, %xmm0 -; addps %xmm0, %xmm1, %xmm0 +; addps %xmm0, %xmm0 +; addps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1065,13 +1065,13 @@ block0(v0: f32x4): ; movq %rsp, %rbp ; block0: ; uninit %xmm6 -; xorps %xmm6, %xmm6, %xmm6 +; xorps %xmm6, %xmm6 ; maxps %xmm0, %xmm6, %xmm0 ; pcmpeqd %xmm6, %xmm6, %xmm6 ; psrld %xmm6, $1, %xmm6 ; cvtdq2ps %xmm6, %xmm7 ; cvttps2dq %xmm0, %xmm6 -; subps %xmm0, %xmm7, %xmm0 +; subps %xmm7, %xmm0 ; cmpps $2, %xmm7, %xmm0, %xmm7 ; cvttps2dq %xmm0, %xmm0 ; pxor %xmm0, %xmm7, %xmm0 @@ -1117,7 +1117,7 @@ block0(v0: f32x4): ; block0: ; movdqa %xmm0, %xmm4 ; cmpps $0, %xmm4, %xmm0, %xmm4 -; andps %xmm0, %xmm4, %xmm0 +; andps %xmm4, %xmm0 ; pxor %xmm4, %xmm0, %xmm4 ; cvttps2dq %xmm0, %xmm1 ; movdqa %xmm1, %xmm0 @@ -1163,10 +1163,10 @@ block0(v0: i64x2): ; movdqa %xmm1, %xmm7 ; psrlq %xmm0, $32, %xmm0 ; por %xmm0, const(2), %xmm0 -; subpd %xmm0, const(3), %xmm0 +; subpd (%rip), %xmm0 ; movdqa %xmm0, %xmm1 ; movdqa %xmm7, %xmm0 -; addpd %xmm0, %xmm1, %xmm0 +; addpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1207,7 +1207,7 @@ block0(v0: i64x2): ; movq %rsp, %rbp ; block0: ; uninit %xmm1 -; xorpd %xmm1, %xmm1, %xmm1 +; xorpd %xmm1, %xmm1 ; movdqa %xmm0, %xmm6 ; movq %xmm6, %r9 ; movdqa %xmm1, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index 8175a4e83315..cf399ebed643 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -13,7 +13,7 @@ block0(v0: f64): ; block0: ; movabsq $9223372036854775807, %rax ; movq %rax, %xmm4 -; andpd %xmm0, %xmm4, %xmm0 +; andpd %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -44,7 +44,7 @@ block0(v0: i64): ; movsd 0(%rdi), %xmm0 ; movabsq $9223372036854775807, %rcx ; movq %rcx, %xmm5 -; andpd %xmm0, %xmm5, %xmm0 +; andpd %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fneg.clif b/cranelift/filetests/filetests/isa/x64/fneg.clif index 7a8c77d4edf6..de0ba6f149b2 100644 --- a/cranelift/filetests/filetests/isa/x64/fneg.clif +++ b/cranelift/filetests/filetests/isa/x64/fneg.clif @@ -13,7 +13,7 @@ block0(v0: f32): ; block0: ; movl $-2147483648, %eax ; movd %eax, %xmm4 -; xorps %xmm0, %xmm4, %xmm0 +; xorps %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -42,7 +42,7 @@ block0(v0: f64): ; block0: ; movabsq $-9223372036854775808, %rax ; movq %rax, %xmm4 -; xorpd %xmm0, %xmm4, %xmm0 +; xorpd %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -72,7 +72,7 @@ block0(v0: f32x4): ; uninit %xmm4 ; pcmpeqd %xmm4, %xmm4, %xmm4 ; pslld %xmm4, $31, %xmm4 -; xorps %xmm0, %xmm4, %xmm0 +; xorps %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -102,7 +102,7 @@ block0(v0: f64x2): ; uninit %xmm4 ; pcmpeqd %xmm4, %xmm4, %xmm4 ; psllq %xmm4, $63, %xmm4 -; xorpd %xmm0, %xmm4, %xmm0 +; xorpd %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fpromote-demote.clif b/cranelift/filetests/filetests/isa/x64/fpromote-demote.clif index c064b2b6b85d..6931e0d789e3 100644 --- a/cranelift/filetests/filetests/isa/x64/fpromote-demote.clif +++ b/cranelift/filetests/filetests/isa/x64/fpromote-demote.clif @@ -13,7 +13,7 @@ block0(v0: f32): ; block0: ; movdqa %xmm0, %xmm5 ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; movdqa %xmm5, %xmm7 ; cvtss2sd %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp @@ -52,7 +52,7 @@ block0(v1: i64, v2: f32): ; lea rsp(0 + virtual offset), %r8 ; movss %xmm0, 0(%r8) ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; cvtss2sd %xmm0, 0(%r8), %xmm0 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -86,7 +86,7 @@ block0(v0: f64): ; block0: ; movdqa %xmm0, %xmm5 ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movdqa %xmm5, %xmm7 ; cvtsd2ss %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp @@ -125,7 +125,7 @@ block0(v1: i64, v2: f64): ; lea rsp(0 + virtual offset), %r8 ; movsd %xmm0, 0(%r8) ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; cvtsd2ss %xmm0, 0(%r8), %xmm0 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/fsqrt.clif b/cranelift/filetests/filetests/isa/x64/fsqrt.clif index be10b19ac4e8..c916d0b33393 100644 --- a/cranelift/filetests/filetests/isa/x64/fsqrt.clif +++ b/cranelift/filetests/filetests/isa/x64/fsqrt.clif @@ -13,7 +13,7 @@ block0(v0: f32): ; block0: ; movdqa %xmm0, %xmm5 ; uninit %xmm0 -; xorps %xmm0, %xmm0, %xmm0 +; xorps %xmm0, %xmm0 ; movdqa %xmm5, %xmm7 ; sqrtss %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp @@ -45,7 +45,7 @@ block0(v0: f64): ; block0: ; movdqa %xmm0, %xmm5 ; uninit %xmm0 -; xorpd %xmm0, %xmm0, %xmm0 +; xorpd %xmm0, %xmm0 ; movdqa %xmm5, %xmm7 ; sqrtsd %xmm0, %xmm7, %xmm0 ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif b/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif index b29003307fe3..f2dbee6031a7 100644 --- a/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif +++ b/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif @@ -12,7 +12,7 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addps %xmm0, %xmm1, %xmm0 +; addps %xmm1, %xmm0 ; movl $2143289344, %r10d ; movd %r10d, %xmm7 ; shufps $0, %xmm7, const(0), %xmm7 diff --git a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif index 144a19654be3..c59bae7583b0 100644 --- a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif +++ b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif @@ -12,15 +12,15 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addps %xmm0, %xmm1, %xmm0 +; addps %xmm1, %xmm0 ; movdqa %xmm0, %xmm1 ; movl $2143289344, %esi ; movd %esi, %xmm5 ; shufps $0, %xmm5, const(0), %xmm5 ; cmpps $3, %xmm0, %xmm1, %xmm0 -; andps %xmm5, %xmm0, %xmm5 +; andps %xmm0, %xmm5 ; andnps %xmm0, %xmm1, %xmm0 -; orps %xmm0, %xmm5, %xmm0 +; orps %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -66,7 +66,7 @@ block0(v0: f64, v1: f64): ; movabsq $9221120237041090560, %r11 ; movq %r11, %xmm5 ; cmppd $3, %xmm0, %xmm7, %xmm0 -; andpd %xmm5, %xmm0, %xmm5 +; andpd %xmm0, %xmm5 ; andnpd %xmm0, %xmm7, %xmm0 ; orpd %xmm5, %xmm0 ; movq %rbp, %rsp @@ -105,9 +105,9 @@ block0(v0: f32, v1: f32): ; movl $2143289344, %r11d ; movd %r11d, %xmm5 ; cmpps $3, %xmm0, %xmm7, %xmm0 -; andps %xmm5, %xmm0, %xmm5 +; andps %xmm0, %xmm5 ; andnps %xmm0, %xmm7, %xmm0 -; orps %xmm0, %xmm5, %xmm0 +; orps %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/narrowing.clif b/cranelift/filetests/filetests/isa/x64/narrowing.clif index d46d1c9980da..e1d17c5dc3b3 100644 --- a/cranelift/filetests/filetests/isa/x64/narrowing.clif +++ b/cranelift/filetests/filetests/isa/x64/narrowing.clif @@ -65,7 +65,7 @@ block0(v0: f64x2): ; block0: ; movdqa %xmm0, %xmm3 ; cmppd $0, %xmm3, %xmm0, %xmm3 -; andps %xmm3, const(0), %xmm3 +; andps (%rip), %xmm3 ; minpd %xmm0, %xmm3, %xmm0 ; cvttpd2dq %xmm0, %xmm0 ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 335833332add..55d4b89be81a 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -11,7 +11,7 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andps %xmm0, %xmm1, %xmm0 +; andps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -36,7 +36,7 @@ block0(v0: f64x2, v1: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andpd %xmm0, %xmm1, %xmm0 +; andpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -86,7 +86,7 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; orps %xmm0, %xmm1, %xmm0 +; orps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -161,7 +161,7 @@ block0(v0: f32x4, v1: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorps %xmm0, %xmm1, %xmm0 +; xorps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -186,7 +186,7 @@ block0(v0: f64x2, v1: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorpd %xmm0, %xmm1, %xmm0 +; xorpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -265,9 +265,9 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andps %xmm1, %xmm0, %xmm1 +; andps %xmm0, %xmm1 ; andnps %xmm0, %xmm2, %xmm0 -; orps %xmm0, %xmm1, %xmm0 +; orps %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -294,7 +294,7 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andpd %xmm1, %xmm0, %xmm1 +; andpd %xmm0, %xmm1 ; andnpd %xmm0, %xmm2, %xmm0 ; orpd %xmm1, %xmm0 ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif b/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif index 4ca19dce7fa6..5d5603d0d9fc 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif @@ -17,10 +17,10 @@ block0(v0: i64, v1: f32x4): ; maxps %xmm0, %xmm4, %xmm0 ; maxps %xmm4, %xmm6, %xmm4 ; movdqa %xmm0, %xmm1 -; xorps %xmm1, %xmm4, %xmm1 -; orps %xmm0, %xmm1, %xmm0 +; xorps %xmm4, %xmm1 +; orps %xmm1, %xmm0 ; movdqa %xmm0, %xmm4 -; subps %xmm4, %xmm1, %xmm4 +; subps %xmm1, %xmm4 ; cmpps $3, %xmm0, %xmm0, %xmm0 ; psrld %xmm0, $10, %xmm0 ; andnps %xmm0, %xmm4, %xmm0 @@ -64,10 +64,10 @@ block0(v0: i64, v1: f32x4): ; movdqa %xmm0, %xmm1 ; minps %xmm1, %xmm4, %xmm1 ; minps %xmm4, %xmm0, %xmm4 -; orps %xmm1, %xmm4, %xmm1 +; orps %xmm4, %xmm1 ; movdqa %xmm1, %xmm0 ; cmpps $3, %xmm0, %xmm4, %xmm0 -; orps %xmm1, %xmm0, %xmm1 +; orps %xmm0, %xmm1 ; psrld %xmm0, $10, %xmm0 ; andnps %xmm0, %xmm1, %xmm0 ; movq %rbp, %rsp @@ -109,10 +109,10 @@ block0(v0: i64, v1: f64x2): ; maxpd %xmm0, %xmm4, %xmm0 ; maxpd %xmm4, %xmm6, %xmm4 ; movdqa %xmm0, %xmm1 -; xorpd %xmm1, %xmm4, %xmm1 +; xorpd %xmm4, %xmm1 ; orpd %xmm1, %xmm0 ; movdqa %xmm0, %xmm4 -; subpd %xmm4, %xmm1, %xmm4 +; subpd %xmm1, %xmm4 ; cmppd $3, %xmm0, %xmm0, %xmm0 ; psrlq %xmm0, $13, %xmm0 ; andnpd %xmm0, %xmm4, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/uunarrow.clif b/cranelift/filetests/filetests/isa/x64/uunarrow.clif index 57fdf011cbaa..e2391ab77813 100644 --- a/cranelift/filetests/filetests/isa/x64/uunarrow.clif +++ b/cranelift/filetests/filetests/isa/x64/uunarrow.clif @@ -14,11 +14,11 @@ block0(v0: f64x2): ; movq %rsp, %rbp ; block0: ; uninit %xmm3 -; xorpd %xmm3, %xmm3, %xmm3 +; xorpd %xmm3, %xmm3 ; maxpd %xmm0, %xmm3, %xmm0 ; minpd %xmm0, const(0), %xmm0 ; roundpd $3, %xmm0, %xmm0 -; addpd %xmm0, const(1), %xmm0 +; addpd (%rip), %xmm0 ; shufps $136, %xmm0, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/widening.clif b/cranelift/filetests/filetests/isa/x64/widening.clif index 1bd9ef170bcb..8e16188adf97 100644 --- a/cranelift/filetests/filetests/isa/x64/widening.clif +++ b/cranelift/filetests/filetests/isa/x64/widening.clif @@ -299,7 +299,7 @@ block0(v0: i32x4): ; movq %rsp, %rbp ; block0: ; uninit %xmm3 -; xorps %xmm3, %xmm3, %xmm3 +; xorps %xmm3, %xmm3 ; unpckhps %xmm0, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp