Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 26 additions & 32 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@
(rd WritableReg)
(imm Imm20))

(LoadConst32
(rd WritableReg)
(imm u32))

(LoadConst64
(LoadInlineConst
(rd WritableReg)
(ty Type)
(imm u64))

(Auipc
Expand Down Expand Up @@ -1525,6 +1522,7 @@
;; Immediate Loading rules
;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
;; TODO: We can load constants like 0x3ff0000000000000 by loading the bits then doing a shift left.
;; TODO: Load floats using `fld` instead of `ld`
(decl imm (Type u64) Reg)

;; Refs get loaded as integers.
Expand Down Expand Up @@ -1553,10 +1551,12 @@
(if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c))
(rv_addi (rv_lui imm20) imm12))

;; Otherwise we fall back to loading the immediate from memory
;; TODO: This doesen't yet emit the constant into the memory pool, but it would
;; be pretty neat if it did.
(rule 0 (imm (ty_int ty) c) (emit_load_const_64 c))
;; Otherwise we fall back to loading the immediate from the constant pool.
(rule 0 (imm (ty_int ty) c)
(emit_load
(LoadOP.Ld)
(mem_flags_trusted)
(gen_const_amode (emit_u64_le_const c))))
Comment on lines +1555 to +1559
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is preexisting I think, and probably affects other backends too, but technically emitting a 64-bit constant here is over-approximating how large it needs to be, right? In that only the ty-width sized bit-pattern of c is all that matters so for a 32-bit constant the 64-bit size is overkill?

No need to fix here since this preserves what was done before, but may be worth trying to fix later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My plan is to eventually fix #6922 and then we should be able to change this to only ever trigger for 64 bit constants, since all other constants should be able to be emitted with the previous rules (i.e. with constant materialization instructions).


;; Imm12 Rules

Expand Down Expand Up @@ -1679,10 +1679,6 @@
(_ Unit (emit (MInst.AluRRR op dst src1 src2))))
dst))


(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode)
(extern constructor pack_float_rounding_mode pack_float_rounding_mode)

;; Helper for emitting `MInst.AluRRR` instructions.
(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
(rule (fpu_rrr op ty src1 src2)
Expand Down Expand Up @@ -1714,12 +1710,6 @@
(_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
dst))

;; Helper for emitting the `LoadConst64` instruction.
(decl emit_load_const_64 (u64) XReg)
(rule (emit_load_const_64 imm)
(let ((dst WritableXReg (temp_writable_xreg))
(_ Unit (emit (MInst.LoadConst64 dst imm))))
dst))

;; Helper for emitting the `Lui` instruction.
;; TODO: This should be something like `emit_u_type`. And should share the
Expand Down Expand Up @@ -2298,24 +2288,28 @@
(decl gen_const_amode (VCodeConstant) AMode)
(extern constructor gen_const_amode gen_const_amode)

(decl offset32_imm (i32) Offset32)
(extern constructor offset32_imm offset32_imm)

;; Returns a canonical type for a LoadOP. We only return I64 or F64.
(decl load_op_reg_type (LoadOP) Type)
(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
(rule 0 (load_op_reg_type _) $I64)

(decl emit_load (LoadOP MemFlags AMode) Reg)
(rule (emit_load op flags from)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags from))))
dst))

;; helper function to load from memory.
(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
(rule
(gen_load p offset op flags ty)
(let
((tmp WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64)))))
tmp))
(rule (gen_load p offset op flags ty)
(emit_load op flags (gen_amode p offset $I64)))

(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
(rule
(gen_load_128 p offset flags)
(let
((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
(high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
(rule (gen_load_128 p offset flags)
(let ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
(high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
(value_regs low high)))

(decl default_memflags () MemFlags)
Expand Down
128 changes: 40 additions & 88 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,78 +25,6 @@ impl EmitInfo {
}
}

/// load constant by put the constant in the code stream.
/// calculate the pc and using load instruction.
/// This is only allow used in the emit stage.
/// Because of those instruction must execute together.
/// see https://github.com/bytecodealliance/wasmtime/pull/5612
#[derive(Clone, Copy)]
pub(crate) enum LoadConstant {
U32(u32),
U64(u64),
}

impl LoadConstant {
fn to_le_bytes(self) -> Vec<u8> {
match self {
LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
}
}
fn load_op(self) -> LoadOP {
match self {
LoadConstant::U32(_) => LoadOP::Lwu,
LoadConstant::U64(_) => LoadOP::Ld,
}
}
fn load_ty(self) -> Type {
match self {
LoadConstant::U32(_) => R32,
LoadConstant::U64(_) => R64,
}
}

pub(crate) fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
self,
rd: Writable<Reg>,
alloc_tmp: &mut F,
) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
// get current pc.
let pc = alloc_tmp(I64);
insts.push(Inst::Auipc {
rd: pc,
imm: Imm20 { bits: 0 },
});
// load
insts.push(Inst::Load {
rd,
op: self.load_op(),
flags: MemFlags::new(),
from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()),
});
let data = self.to_le_bytes();
// jump over.
insts.push(Inst::Jal {
dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32),
});
insts.push(Inst::RawData { data });
insts
}

// load and perform an extra add.
pub(crate) fn load_constant_and_add(self, rd: Writable<Reg>, rs: Reg) -> SmallInstVec<Inst> {
let mut insts = self.load_constant(rd, &mut |_| rd);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1: rd.to_reg(),
rs2: rs,
});
insts
}
}

pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
}
Expand Down Expand Up @@ -399,8 +327,7 @@ impl Inst {
| Inst::BrTable { .. }
| Inst::Auipc { .. }
| Inst::Lui { .. }
| Inst::LoadConst32 { .. }
| Inst::LoadConst64 { .. }
| Inst::LoadInlineConst { .. }
| Inst::AluRRR { .. }
| Inst::FpuRRR { .. }
| Inst::AluRRImm12 { .. }
Expand Down Expand Up @@ -533,19 +460,34 @@ impl MachInstEmit for Inst {
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
sink.put4(x);
}
&Inst::LoadConst32 { rd, imm } => {
&Inst::LoadInlineConst { rd, ty, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U32(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
&Inst::LoadConst64 { rd, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U64(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));

let data = &imm.to_le_bytes()[..ty.bytes() as usize];

let label_data: MachLabel = sink.get_label();
let label_end: MachLabel = sink.get_label();

// Load into rd
Inst::Load {
rd,
op: LoadOP::from_type(ty),
flags: MemFlags::new(),
from: AMode::Label(label_data),
}
.emit(&[], sink, emit_info, state);

// Jump over the inline pool
Inst::Jal {
dest: BranchTarget::Label(label_end),
}
.emit(&[], sink, emit_info, state);

// Emit the inline data
sink.bind_label(label_data, &mut state.ctrl_plane);
Inst::RawData { data: data.into() }.emit(&[], sink, emit_info, state);

sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::FpuRR {
frm,
Expand Down Expand Up @@ -666,6 +608,8 @@ impl MachInstEmit for Inst {
let offset = from.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);

// TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
// we should try to emit the `auipc` and add a relocation on this load.
let (addr, imm12) = match (base, offset_imm12) {
// If the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12)) => (base, imm12),
Expand Down Expand Up @@ -693,6 +637,8 @@ impl MachInstEmit for Inst {
let offset = to.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);

// TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
// we should try to emit the `auipc` and add a relocation on this store.
let (addr, imm12) = match (base, offset_imm12) {
// If the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12)) => (base, imm12),
Expand Down Expand Up @@ -1265,8 +1211,14 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
}
(_, Some(rs), None) => {
LoadConstant::U64(offset as u64)
.load_constant_and_add(rd, rs)
let mut insts = Inst::load_constant_u64(rd, offset as u64);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1: rd.to_reg(),
rs2: rs,
});
insts
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
Expand Down
29 changes: 13 additions & 16 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,23 @@ impl Inst {
pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
insts.unwrap_or_else(|| {
smallvec![Inst::LoadConst32 {
smallvec![Inst::LoadInlineConst {
rd,
imm: value as u32
ty: I32,
imm: value
}]
})
}

pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
insts.unwrap_or_else(|| {
smallvec![Inst::LoadInlineConst {
rd,
ty: I64,
imm: value
}]
})
}

pub(crate) fn construct_auipc_and_jalr(
Expand Down Expand Up @@ -377,8 +384,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
}
&Inst::Auipc { rd, .. } => collector.reg_def(rd),
&Inst::Lui { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
&Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
&Inst::AluRRR { rd, rs1, rs2, .. } => {
collector.reg_use(rs1);
collector.reg_use(rs2);
Expand Down Expand Up @@ -958,7 +964,7 @@ impl MachInst for Inst {

fn worst_case_size() -> CodeOffset {
// calculate by test function riscv64_worst_case_instruction_size()
116
124
}

fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
Expand Down Expand Up @@ -1371,16 +1377,7 @@ impl Inst {
&Inst::Lui { rd, ref imm } => {
format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
}
&Inst::LoadConst32 { rd, imm } => {
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
buf
}
&Inst::LoadConst64 { rd, imm } => {
&Inst::LoadInlineConst { rd, imm, .. } => {
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
Expand Down
7 changes: 0 additions & 7 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,17 +440,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
self.backend.isa_flags.has_zbs()
}

fn offset32_imm(&mut self, offset: i32) -> Offset32 {
Offset32::new(offset)
}
fn default_memflags(&mut self) -> MemFlags {
MemFlags::new()
}

fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode {
Some(*f)
}

fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR {
FpuOPRR::int_convert_2_float_op(from, is_signed, to)
}
Expand Down
Loading