Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,15 @@
(rule (lower (insertlane vec @ (value_type $I64X2) val (u8_from_uimm8 1)))
(x64_punpcklqdq vec (x64_movq_to_xmm val)))

;; (i64x2.replace_lane 1) with a splat as source for lane 0 -- we can elide
;; the splat and just do a move. This turns out to be a common pattern when
;; constructing an i64x2 out of two i64s.
(rule 3 (lower (insertlane (has_type $I64X2 (splat lane0))
lane1
(u8_from_uimm8 1)))
(if-let $true (use_sse41))
(x64_pinsrq (bitcast_gpr_to_xmm $I64 lane0) lane1 1))

(rule 1 (lower (insertlane vec @ (value_type $F32X4) (sinkable_load val) (u8_from_uimm8 idx)))
(if-let $true (use_sse41))
(x64_insertps vec val (sse_insertps_lane_imm idx)))
Expand Down Expand Up @@ -4258,6 +4267,11 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))

;; Special case for a zero-vector: don't load, xor instead.
(rule 1 (lower (has_type ty (vconst (u128_from_constant 0))))
(let ((dst Xmm (xmm_uninit_value)))
(x64_pxor dst dst)))

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Special case for `pblendw` which takes an 8-bit immediate where each bit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ block0:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqu const(3), %xmm0
; movdqu const(2), %xmm2
; uninit %xmm0
; pxor %xmm0, %xmm0, %xmm0
; movdqu const(2), %xmm3
; pshufb %xmm0, const(0), %xmm0
; pshufb %xmm2, const(1), %xmm2
; por %xmm0, %xmm2, %xmm0
; pshufb %xmm3, const(1), %xmm3
; por %xmm0, %xmm3, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -29,22 +30,16 @@ block0:
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqu 0x24(%rip), %xmm0
; movdqu 0x2c(%rip), %xmm2
; pshufb 0x33(%rip), %xmm0
; pshufb 0x3a(%rip), %xmm2
; por %xmm2, %xmm0
; pxor %xmm0, %xmm0
; movdqu 0x20(%rip), %xmm3
; pshufb 0x27(%rip), %xmm0
; pshufb 0x2e(%rip), %xmm3
; por %xmm3, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rcx)
; addb %al, (%rax)
; addb %al, (%rax)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
test compile precise-output
target x86_64 sse42 has_avx

function %i64x2_make0() -> i64x2 {
block0:
v0 = vconst.i64x2 [0 0]
return v0
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; uninit %xmm0
; vpxor %xmm0, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpxor %xmm0, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %i64x2_make1(i64) -> i64x2 {
block0(v0: i64):
v1 = vconst.i64x2 [0 0]
v2 = insertlane.i64x2 v1, v0, 0
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; uninit %xmm3
; vpxor %xmm3, %xmm3, %xmm5
; vpinsrq $0, %xmm5, %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpxor %xmm3, %xmm3, %xmm5
; vpinsrq $0, %rdi, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %i64x2_make2(i64, i64) -> i64x2 {
block0(v0: i64, v1: i64):
v2 = splat.i64x2 v0
v3 = insertlane.i64x2 v2, v1, 1
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovq %rdi, %xmm3
; vpinsrq $1, %xmm3, %rsi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovq %rdi, %xmm3
; vpinsrq $1, %rsi, %xmm3, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

86 changes: 86 additions & 0 deletions cranelift/filetests/filetests/isa/x64/simd-make-vectors.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
test compile precise-output
target x86_64 sse42

function %i64x2_make0() -> i64x2 {
block0:
v0 = vconst.i64x2 [0 0]
return v0
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; uninit %xmm0
; pxor %xmm0, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pxor %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %i64x2_make1(i64) -> i64x2 {
block0(v0: i64):
v1 = vconst.i64x2 [0 0]
v2 = insertlane.i64x2 v1, v0, 0
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; uninit %xmm0
; pxor %xmm0, %xmm0, %xmm0
; pinsrd.w $0, %xmm0, %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; pxor %xmm0, %xmm0
; pinsrq $0, %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %i64x2_make2(i64, i64) -> i64x2 {
block0(v0: i64, v1: i64):
v2 = splat.i64x2 v0
v3 = insertlane.i64x2 v2, v1, 1
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %xmm0
; pinsrd.w $1, %xmm0, %rsi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %xmm0
; pinsrq $1, %rsi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

39 changes: 39 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-make-vectors.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 sse42
target x86_64 sse42 has_avx
target riscv64 has_v
target riscv64 has_v has_c has_zcb

function %i64x2_make0() -> i64x2 {
block0:
v0 = vconst.i64x2 [0 0]
return v0
}

; run: %i64x2_make0() == [0 0]

function %i64x2_make1(i64) -> i64x2 {
block0(v0: i64):
v1 = vconst.i64x2 [0 0]
v2 = insertlane.i64x2 v1, v0, 0
return v2
}

; run: %i64x2_make1(0) == [0 0]
; run: %i64x2_make1(0x123456789abcdef0) == [0x123456789abcdef0 0]

function %i64x2_make2(i64, i64) -> i64x2 {
block0(v0: i64, v1: i64):
v2 = splat.i64x2 v0
v3 = insertlane.i64x2 v2, v1, 1
return v3
}

; run: %i64x2_make2(0, 0) == [0 0]
; run: %i64x2_make2(0x123456789abcdef0, 0) == [0x123456789abcdef0 0]
; run: %i64x2_make2(0, 0x123456789abcdef0) == [0 0x123456789abcdef0]
; run: %i64x2_make2(0x123456789abcdef0, 0x0fedcba987654321) == [0x123456789abcdef0 0x0fedcba987654321]