bytecodealliance · afonso360 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023
@@ -10,12 +10,9 @@
       (rd WritableReg)
       (imm Imm20))
 
-    (LoadConst32
-      (rd WritableReg)
-      (imm u32))
-
-    (LoadConst64
+    (LoadInlineConst
       (rd WritableReg)
+      (ty Type)
       (imm u64))
 
      (Auipc
@@ -1525,6 +1522,7 @@
 ;; Immediate Loading rules
 ;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
 ;; TODO: We can load constants like 0x3ff0000000000000 by loading the bits then doing a shift left.
+;; TODO: Load floats using `fld` instead of `ld`
 (decl imm (Type u64) Reg)
 
 ;; Refs get loaded as integers.
@@ -1553,10 +1551,12 @@
   (if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c))
   (rv_addi (rv_lui imm20) imm12))
 
-;; Otherwise we fall back to loading the immediate from memory
-;; TODO: This doesen't yet emit the constant into the memory pool, but it would
-;; be pretty neat if it did.
-(rule 0 (imm (ty_int ty) c) (emit_load_const_64 c))
+;; Otherwise we fall back to loading the immediate from the constant pool.
+(rule 0 (imm (ty_int ty) c)
+  (emit_load
+    (LoadOP.Ld)
+    (mem_flags_trusted)
+    (gen_const_amode (emit_u64_le_const c))))
 
 ;; Imm12 Rules
 
@@ -1679,10 +1679,6 @@
             (_ Unit (emit (MInst.AluRRR op dst src1 src2))))
         dst))
 
-
-(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode)
-(extern constructor pack_float_rounding_mode pack_float_rounding_mode)
-
 ;; Helper for emitting `MInst.AluRRR` instructions.
 (decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
 (rule (fpu_rrr op ty src1 src2)
@@ -1714,12 +1710,6 @@
             (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
         dst))
 
-;; Helper for emitting the `LoadConst64` instruction.
-(decl emit_load_const_64 (u64) XReg)
-(rule (emit_load_const_64 imm)
-      (let ((dst WritableXReg (temp_writable_xreg))
-            (_ Unit (emit (MInst.LoadConst64 dst imm))))
-        dst))
 
 ;; Helper for emitting the `Lui` instruction.
 ;; TODO: This should be something like `emit_u_type`. And should share the
@@ -2298,24 +2288,28 @@
 (decl gen_const_amode (VCodeConstant) AMode)
 (extern constructor gen_const_amode gen_const_amode)
 
-(decl offset32_imm (i32) Offset32)
-(extern constructor offset32_imm offset32_imm)
+
+;; Returns a canonical type for a LoadOP. We only return I64 or F64.
+(decl load_op_reg_type (LoadOP) Type)
+(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
+(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
+(rule 0 (load_op_reg_type _) $I64)
+
+(decl emit_load (LoadOP MemFlags AMode) Reg)
+(rule (emit_load op flags from)
+      (let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
+            (_ Unit (emit (MInst.Load dst op flags from))))
+        dst))
 
 ;; helper function to load from memory.
 (decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
-(rule
-  (gen_load p offset op flags ty)
-  (let
-    ((tmp WritableReg (temp_writable_reg ty))
-      (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64)))))
-    tmp))
+(rule (gen_load p offset op flags ty)
+  (emit_load op flags (gen_amode p offset $I64)))
 
 (decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
-(rule
-  (gen_load_128 p offset flags)
-  (let
-    ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
-      (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
+(rule (gen_load_128 p offset flags)
+  (let ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
+        (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
     (value_regs low high)))
 
 (decl default_memflags () MemFlags)

@@ -25,78 +25,6 @@ impl EmitInfo {
     }
 }
 
-/// load constant by put the constant in the code stream.
-/// calculate the pc and using load instruction.
-/// This is only allow used in the emit stage.
-/// Because of those instruction must execute together.
-/// see https://github.com/bytecodealliance/wasmtime/pull/5612
-#[derive(Clone, Copy)]
-pub(crate) enum LoadConstant {
-    U32(u32),
-    U64(u64),
-}
-
-impl LoadConstant {
-    fn to_le_bytes(self) -> Vec<u8> {
-        match self {
-            LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
-            LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
-        }
-    }
-    fn load_op(self) -> LoadOP {
-        match self {
-            LoadConstant::U32(_) => LoadOP::Lwu,
-            LoadConstant::U64(_) => LoadOP::Ld,
-        }
-    }
-    fn load_ty(self) -> Type {
-        match self {
-            LoadConstant::U32(_) => R32,
-            LoadConstant::U64(_) => R64,
-        }
-    }
-
-    pub(crate) fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
-        self,
-        rd: Writable<Reg>,
-        alloc_tmp: &mut F,
-    ) -> SmallInstVec<Inst> {
-        let mut insts = SmallInstVec::new();
-        // get current pc.
-        let pc = alloc_tmp(I64);
-        insts.push(Inst::Auipc {
-            rd: pc,
-            imm: Imm20 { bits: 0 },
-        });
-        // load
-        insts.push(Inst::Load {
-            rd,
-            op: self.load_op(),
-            flags: MemFlags::new(),
-            from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()),
-        });
-        let data = self.to_le_bytes();
-        // jump over.
-        insts.push(Inst::Jal {
-            dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32),
-        });
-        insts.push(Inst::RawData { data });
-        insts
-    }
-
-    // load and perform an extra add.
-    pub(crate) fn load_constant_and_add(self, rd: Writable<Reg>, rs: Reg) -> SmallInstVec<Inst> {
-        let mut insts = self.load_constant(rd, &mut |_| rd);
-        insts.push(Inst::AluRRR {
-            alu_op: AluOPRRR::Add,
-            rd,
-            rs1: rd.to_reg(),
-            rs2: rs,
-        });
-        insts
-    }
-}
-
 pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
     u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
 }
@@ -399,8 +327,7 @@ impl Inst {
             | Inst::BrTable { .. }
             | Inst::Auipc { .. }
             | Inst::Lui { .. }
-            | Inst::LoadConst32 { .. }
-            | Inst::LoadConst64 { .. }
+            | Inst::LoadInlineConst { .. }
             | Inst::AluRRR { .. }
             | Inst::FpuRRR { .. }
             | Inst::AluRRImm12 { .. }
@@ -533,19 +460,34 @@ impl MachInstEmit for Inst {
                 let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
                 sink.put4(x);
             }
-            &Inst::LoadConst32 { rd, imm } => {
+            &Inst::LoadInlineConst { rd, ty, imm } => {
                 let rd = allocs.next_writable(rd);
-                LoadConstant::U32(imm)
-                    .load_constant(rd, &mut |_| rd)
-                    .into_iter()
-                    .for_each(|inst| inst.emit(&[], sink, emit_info, state));
-            }
-            &Inst::LoadConst64 { rd, imm } => {
-                let rd = allocs.next_writable(rd);
-                LoadConstant::U64(imm)
-                    .load_constant(rd, &mut |_| rd)
-                    .into_iter()
-                    .for_each(|inst| inst.emit(&[], sink, emit_info, state));
+
+                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
+
+                let label_data: MachLabel = sink.get_label();
+                let label_end: MachLabel = sink.get_label();
+
+                // Load into rd
+                Inst::Load {
+                    rd,
+                    op: LoadOP::from_type(ty),
+                    flags: MemFlags::new(),
+                    from: AMode::Label(label_data),
+                }
+                .emit(&[], sink, emit_info, state);
+
+                // Jump over the inline pool
+                Inst::Jal {
+                    dest: BranchTarget::Label(label_end),
+                }
+                .emit(&[], sink, emit_info, state);
+
+                // Emit the inline data
+                sink.bind_label(label_data, &mut state.ctrl_plane);
+                Inst::RawData { data: data.into() }.emit(&[], sink, emit_info, state);
+
+                sink.bind_label(label_end, &mut state.ctrl_plane);
             }
             &Inst::FpuRR {
                 frm,
@@ -666,6 +608,8 @@ impl MachInstEmit for Inst {
                 let offset = from.get_offset_with_state(state);
                 let offset_imm12 = Imm12::maybe_from_i64(offset);
 
+                // TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
+                // we should try to emit the `auipc` and add a relocation on this load.
                 let (addr, imm12) = match (base, offset_imm12) {
                     // If the offset fits into an imm12 we can directly encode it.
                     (Some(base), Some(imm12)) => (base, imm12),
@@ -693,6 +637,8 @@ impl MachInstEmit for Inst {
                 let offset = to.get_offset_with_state(state);
                 let offset_imm12 = Imm12::maybe_from_i64(offset);
 
+                // TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
+                // we should try to emit the `auipc` and add a relocation on this store.
                 let (addr, imm12) = match (base, offset_imm12) {
                     // If the offset fits into an imm12 we can directly encode it.
                     (Some(base), Some(imm12)) => (base, imm12),
@@ -1265,8 +1211,14 @@ impl MachInstEmit for Inst {
                         .emit(&[], sink, emit_info, state);
                     }
                     (_, Some(rs), None) => {
-                        LoadConstant::U64(offset as u64)
-                            .load_constant_and_add(rd, rs)
+                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
+                        insts.push(Inst::AluRRR {
+                            alu_op: AluOPRRR::Add,
+                            rd,
+                            rs1: rd.to_reg(),
+                            rs2: rs,
+                        });
+                        insts
                             .into_iter()
                             .for_each(|inst| inst.emit(&[], sink, emit_info, state));
                     }

@@ -227,16 +227,23 @@ impl Inst {
     pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
         let insts = Inst::load_const_imm(rd, value);
         insts.unwrap_or_else(|| {
-            smallvec![Inst::LoadConst32 {
+            smallvec![Inst::LoadInlineConst {
                 rd,
-                imm: value as u32
+                ty: I32,
+                imm: value
             }]
         })
     }
 
     pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
         let insts = Inst::load_const_imm(rd, value);
-        insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
+        insts.unwrap_or_else(|| {
+            smallvec![Inst::LoadInlineConst {
+                rd,
+                ty: I64,
+                imm: value
+            }]
+        })
     }
 
     pub(crate) fn construct_auipc_and_jalr(
@@ -377,8 +384,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
         }
         &Inst::Auipc { rd, .. } => collector.reg_def(rd),
         &Inst::Lui { rd, .. } => collector.reg_def(rd),
-        &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
-        &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
+        &Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
         &Inst::AluRRR { rd, rs1, rs2, .. } => {
             collector.reg_use(rs1);
             collector.reg_use(rs2);
@@ -958,7 +964,7 @@ impl MachInst for Inst {
 
     fn worst_case_size() -> CodeOffset {
         // calculate by test function riscv64_worst_case_instruction_size()
-        116
+        124
     }
 
     fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
@@ -1371,16 +1377,7 @@ impl Inst {
             &Inst::Lui { rd, ref imm } => {
                 format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
             }
-            &Inst::LoadConst32 { rd, imm } => {
-                let rd = format_reg(rd.to_reg(), allocs);
-                let mut buf = String::new();
-                write!(&mut buf, "auipc {},0; ", rd).unwrap();
-                write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
-                write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
-                write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
-                buf
-            }
-            &Inst::LoadConst64 { rd, imm } => {
+            &Inst::LoadInlineConst { rd, imm, .. } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 let mut buf = String::new();
                 write!(&mut buf, "auipc {},0; ", rd).unwrap();

@@ -440,17 +440,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
         self.backend.isa_flags.has_zbs()
     }
 
-    fn offset32_imm(&mut self, offset: i32) -> Offset32 {
-        Offset32::new(offset)
-    }
     fn default_memflags(&mut self) -> MemFlags {
         MemFlags::new()
     }
 
-    fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode {
-        Some(*f)
-    }
-
     fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR {
         FpuOPRR::int_convert_2_float_op(from, is_signed, to)
     }