Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cranelift/codegen/src/ir/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ impl Type {
self.replace_lanes(match self.lane_type() {
I8 | B1 | B8 => I8,
I16 | B16 => I16,
I32 | B32 => I32,
I64 | B64 => I64,
I32 | B32 | F32 => I32,
I64 | B64 | F64 => I64,
I128 | B128 => I128,
_ => unimplemented!(),
})
Expand Down
45 changes: 43 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,8 @@
(VecLoadReplicate
(rd WritableReg)
(rn Reg)
(size VectorSize))
(size VectorSize)
(flags MemFlags))

;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
;; control-flow diamond.
Expand Down Expand Up @@ -1376,6 +1377,16 @@
(decl cond_br_cond (Cond) CondBrKind)
(extern constructor cond_br_cond cond_br_cond)

;; Lower the address of a load or a store.
(decl amode (Type Inst u32) AMode)
;; TODO: Port lower_address() to ISLE.
(extern constructor amode amode)

;; Matches an `AMode` that is just a register.
(decl pure amode_is_reg (AMode) Reg)
;; TODO: Implement in ISLE.
(extern constructor amode_is_reg amode_is_reg)

;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Helper for creating the zero register.
Expand Down Expand Up @@ -1481,6 +1492,13 @@
(_ Unit (emit (MInst.VecDup dst src size))))
dst))

;; Helper for emitting `MInst.VecDupFromFpu` instructions.
(decl vec_dup_from_fpu (Reg VectorSize) Reg)
(rule (vec_dup_from_fpu src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecDupFromFpu dst src size))))
dst))

;; Helper for emitting `MInst.AluRRImm12` instructions.
(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
(rule (alu_rr_imm12 op ty src imm)
Expand Down Expand Up @@ -2167,7 +2185,7 @@
(decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load)

;; Sink a `SinkableLoad` into a `Reg`.
;; Sink a `SinkableAtomicLoad` into a `Reg`.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
Expand Down Expand Up @@ -2230,6 +2248,29 @@
(alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi))))

;; Helper for emitting `MInst.VecLoadReplicate` instructions.
(decl ld1r (Reg VectorSize MemFlags) Reg)
(rule (ld1r src size flags)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecLoadReplicate dst src size flags))))
dst))

;; Helper for emitting `MInst.LoadAddr` instructions.
(decl load_addr (AMode) Reg)
(rule (load_addr addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAddr dst addr))))
dst))

(rule (load_addr addr)
(if-let addr_reg (amode_is_reg addr))
addr_reg)

;; Lower a vector splat with a constant parameter.
(decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const)

;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
Expand Down
13 changes: 9 additions & 4 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2258,10 +2258,10 @@ impl MachInstEmit for Inst {
ScalarSize::Size16 => 0b00010,
ScalarSize::Size32 => 0b00100,
ScalarSize::Size64 => 0b01000,
_ => unimplemented!("Unexpected VectorSize: {:?}", size),
_ => unreachable!(),
};
sink.put4(
0b000_01110000_00000_000011_00000_00000
0b0_0_0_01110000_00000_000011_00000_00000
| (q << 30)
| (imm5 << 16)
| (machreg_to_gpr(rn) << 5)
Expand Down Expand Up @@ -2625,13 +2625,18 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
}
&Inst::VecLoadReplicate { rd, rn, size } => {
&Inst::VecLoadReplicate {
rd,
rn,
size,
flags,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (q, size) = size.enc_size();

let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() {
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
Expand Down
16 changes: 8 additions & 8 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2351,10 +2351,10 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(25),
rn: xreg(7),
size: VectorSize::Size8x16,
size: VectorSize::Size8x8,
},
"F90C014E",
"dup v25.16b, w7",
"F90C010E",
"dup v25.8b, w7",
));
insns.push((
Inst::VecDup {
Expand Down Expand Up @@ -2387,10 +2387,10 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(0),
rn: xreg(28),
size: VectorSize::Size32x4,
size: VectorSize::Size32x2,
},
"800F044E",
"dup v0.4s, w28",
"800F040E",
"dup v0.2s, w28",
));
insns.push((
Inst::VecDup {
Expand Down Expand Up @@ -5199,8 +5199,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate {
rd: writable_vreg(31),
rn: xreg(0),

size: VectorSize::Size64x2,
flags: MemFlags::trusted(),
},
"1FCC404D",
"ld1r { v31.2d }, [x0]",
Expand All @@ -5210,8 +5210,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate {
rd: writable_vreg(0),
rn: xreg(25),

size: VectorSize::Size8x8,
flags: MemFlags::trusted(),
},
"20C3400D",
"ld1r { v0.8b }, [x25]",
Expand Down
11 changes: 0 additions & 11 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,17 +530,6 @@ impl Inst {
}
}
}

/// Generate a LoadAddr instruction (load address of an amode into
/// register). Elides when possible (when amode is just a register). Returns
/// destination register: either `rd` or a register directly from the amode.
pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
if let Some(r) = mem.is_reg() {
(r, None)
} else {
(rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
}
}
}

//=============================================================================
Expand Down
51 changes: 27 additions & 24 deletions cranelift/codegen/src/isa/aarch64/inst/regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(xreg(14)),
preg(xreg(15)),
// x16 and x17 are spilltmp and tmp2 (see above).
// x18 could be used by the platform to carry inter-procedural state;
// conservatively assume so and make it not allocatable.
// x19-28 are callee-saved and so not preferred.
// x21 is the pinned register (if enabled) and not allocatable if so.
// x29 is FP, x30 is LR, x31 is SP/ZR.
Expand All @@ -178,30 +180,7 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(5)),
preg(vreg(6)),
preg(vreg(7)),
preg(vreg(8)),
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
// v8-15 are callee-saved and so not preferred.
preg(vreg(16)),
preg(vreg(17)),
preg(vreg(18)),
Expand All @@ -220,6 +199,30 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(31)),
],
],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
preg(vreg(8)),
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
fixed_stack_slots: vec![],
};

Expand Down
43 changes: 39 additions & 4 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1423,7 +1423,8 @@

;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y)))
(rule (lower (has_type ty (bitselect c x y)))
(if (ty_int_bool_ref_scalar_64 ty))
(let ((tmp1 Reg (and_reg ty x c))
(tmp2 Reg (bic ty y c)))
(orr ty tmp1 tmp2)))
Expand All @@ -1441,12 +1442,14 @@
;; T -> I{64,32,16,8}: We can simply pass through the value: values
;; are always stored with high bits undefined, so we can just leave
;; them be.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src)))
(rule (lower (has_type ty (ireduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0))

;; Likewise for breduce.

(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src)))
(rule (lower (has_type ty (breduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0))


Expand Down Expand Up @@ -1515,6 +1518,39 @@
(let ((use_allocated_encoding bool (is_not_baldrdash_call_conv)))
(side_effect (udf use_allocated_encoding trap_code))))

;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (splat x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_dup x (vector_size ty)))

(rule (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
(vec_dup_from_fpu x (vector_size ty)))

(rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat (breduce (bconst (u64_from_bool n))))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat (iconst (u64_from_imm64 n)))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n))))))
(splat_const n (vector_size ty)))

(rule (lower (has_type ty (splat x @ (load flags _addr offset))))
(if-let mem_op (is_sinkable_inst x))
(let ((_ Unit (sink_inst mem_op))
(addr AMode (amode (lane_type ty) mem_op offset))
(address Reg (load_addr addr)))
(ld1r address (vector_size ty) flags)))

;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr)))
Expand All @@ -1527,7 +1563,6 @@
addr))
(side_effect (store_release ty src addr)))


;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (and (use_lse)
Expand Down
26 changes: 24 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ pub mod generated_code;

// Types that the generated ISLE code uses via `use super::*`.
use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::{isle::*, InputSourceInst};
use crate::settings::Flags;
Expand Down Expand Up @@ -442,4 +443,25 @@ where
_ => panic!(),
}
}

fn amode(&mut self, ty: Type, mem_op: Inst, offset: u32) -> AMode {
lower_address(
self.lower_ctx,
ty,
&insn_inputs(self.lower_ctx, mem_op)[..],
offset as i32,
)
}

fn amode_is_reg(&mut self, address: &AMode) -> Option<Reg> {
address.is_reg()
}

fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16);

lower_splat_const(self.lower_ctx, rd, value, *size);

rd.to_reg()
}
}
Loading