bytecodealliance · afonso360 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 25, 2023
@@ -234,13 +234,6 @@
       (rs OptionReg)
       (imm OptionUimm5)
       (csr CsrAddress))
-    ;; an integer compare.
-    (Icmp
-      (cc IntCC)
-      (rd WritableReg)
-      (a ValueRegs)
-      (b ValueRegs)
-      (ty Type))
     ;; select a reg base on condition.
     ;; very useful because in lowering stage we can not have condition branch.
     (SelectReg
@@ -755,6 +748,7 @@
     (writable_reg_to_reg rd)))
 
 
+
 ;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; RV32I Base Integer Instruction Set
@@ -861,12 +855,36 @@
 (rule (rv_andi rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))
 
+;; Helper for emitting the `slt` ("Set Less Than") instruction.
+;; rd ← rs1 < rs2
+(decl rv_slt (Reg Reg) Reg)
+(rule (rv_slt rs1 rs2)
+  (alu_rrr (AluOPRRR.Slt) rs1 rs2))
+
+;; Helper for emitting the `sltz` instruction.
+;; This instruction is a mnemonic for `slt rd, rs, zero`.
+(decl rv_sltz (Reg) Reg)
+(rule (rv_sltz rs1)
+  (rv_slt rs1 (zero_reg)))
+
+;; Helper for emitting the `sgtz` instruction.
+;; This instruction is a mnemonic for `slt rd, zero, rs`.
+(decl rv_sgtz (Reg) Reg)
+(rule (rv_sgtz rs1)
+  (rv_slt (zero_reg) rs1))
+
+;; Helper for emiting the `slti` ("Set Less Than Immediate") instruction.
+;; rd ← rs1 < imm
+(decl rv_slti (Reg Imm12) Reg)
+(rule (rv_slti rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slti) rs1 imm))
+
 ;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
 ;; rd ← rs1 < rs2
 (decl rv_sltu (Reg Reg) Reg)
 (rule (rv_sltu rs1 rs2)
   (alu_rrr (AluOPRRR.SltU) rs1 rs2))
-
+  
 ;; Helper for emitting the `snez` instruction.
 ;; This instruction is a mnemonic for `sltu rd, zero, rs`.
 (decl rv_snez (Reg) Reg)
@@ -1311,6 +1329,11 @@
 (decl imm12_from_u64 (Imm12) u64)
 (extern extractor imm12_from_u64 imm12_from_u64)
 
+;; Extracts an imm12 from an i64. The i64 must be in a range that can be
+;; represented as an imm12. The value is sign-extended acording to the type
+;; provided.
+(decl pure partial imm12_sextend_i64 (Type i64) Imm12)
+(extern constructor imm12_sextend_i64 imm12_sextend_i64)
 
 
 ;; Float Helpers
@@ -2262,13 +2285,183 @@
     (move_x_to_f tmp2 ty)))
 
 
-;;; lower icmp
-(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg)
-(rule 1 (lower_icmp cc x y ty)
-  (if (signed_cond_code cc))
-  (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty))
-(rule (lower_icmp cc x y ty)
-  (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty))
+
+
+;; For Equal and NotEqual it doesen't matter the type of extension that we 
+;; perform, as long as we are consistent on both sides. So try to pick the
+;; ExtendOp's that have a dedicated instruction in the Base ISA.
+;;
+;; The special cases here are:
+;; - i8 -> any: we should prefer ExtendOp.Zero
+;; - i32 -> i64: we should prefer ExtendOp.Signed
+;;
+;; We only handle the signed case here since the unsigned case is the default
+;; for `intcc_to_extend_op`. The other cases lower into a two instruction
+;; sequence.
+(decl icmp_intcc_extend (IntCC Type) ExtendOp)
+(rule 1 (icmp_intcc_extend (IntCC.Equal) $I32) (ExtendOp.Signed))
+(rule 1 (icmp_intcc_extend (IntCC.NotEqual) $I32) (ExtendOp.Signed))
+(rule (icmp_intcc_extend cc _) (intcc_to_extend_op cc))
+
+
+;; Generates an icmp sequence for the given type.
+(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
+
+;; On I128's we don't need any extension.
+(rule 1 (gen_icmp cc x y $I128)
+  (gen_icmp_inner cc x y $I128))
+
+;; Otherwise emit the extension sequence before the comparision.
+(rule (gen_icmp cc x y (fits_in_64 ty))
+  (let ((extend_op ExtendOp (icmp_intcc_extend cc ty))
+        (x_ext Reg (extend x extend_op ty $I64))
+        (y_ext Reg (extend y extend_op ty $I64)))
+      (gen_icmp_inner cc x_ext y_ext ty)))
+
+
+
+;; This emits just the comparision instructions and assumes that
+;; the arguments are already extended.
+;;
+;; We only have actual lowerings for Equal/NotEqual/SignedLessThan/UnsignedLessThan.
+;; Everything else just recurses into one of those cases.
+(decl gen_icmp_inner (IntCC ValueRegs ValueRegs Type) Reg)
+
+;; We only implement LessThan, for GreaterThan we just reverse the arguments
+(rule 4 (gen_icmp_inner cc x y ty)
+  (if (intcc_greater_than cc))
+  (gen_icmp_inner (intcc_reverse cc) y x ty))
+
+;; For these rules, we can just use the normal rules and then invert the result.
+;; i.e. `x <= y` is the same as `!(x > y)`.
+(rule 3 (gen_icmp_inner cc x y ty)
+  (if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc))
+  (let ((res Reg (gen_icmp_inner (intcc_inverse cc) x y ty)))
+    (rv_xori res (imm12_const 1))))
+
+
+;; For `*LessThan` we have a dedicated instruction `slt`/`sltu`.
+(rule (gen_icmp_inner (IntCC.SignedLessThan) x y (fits_in_64 ty))
+  (rv_slt x y))
+(rule (gen_icmp_inner (IntCC.UnsignedLessThan) x y (fits_in_64 ty))
+  (rv_sltu x y))
+
+
+;; Compare the top halves of the two values. If they are equal, then
+;; we can just check the bottom halves. Otherwise we only need to check
+;; the top halves.
+;;
+;; In both signed and unsigned variants the bottom half is always compared
+;; as unsigned. Since we know that the top halves are equal both signs
+;; are the same. If the number is positive, this is fairly straight forward
+;; and if the number is negative in the signed case we can still use
+;; an unsigned comparision due to the way two's complement works.
+;;
+;; As an example:  0xFFFE < 0xFFFF Here both are negative numbers, but when 
+;; considering only at the bottom byte, 0xFE is smaller than 0xFF when viewed
+;; as unsigned. This also holds true when viewing both of these numbers as
+;; signed (-2 < -1), so we can use the unsigned comparison for the bottom half.
+;;
+;; Emit the following sequence:
+;;     slt{,u}   t1, x_hi, y_hi
+;;     sltu      t2, x_lo, y_lo
+;;     beq       x_hi, y_hi, .top_is_equal
+;;     mov       rd, t1
+;;     j         .end
+;; .top_is_equal:
+;;     mov       rd, t2
+;; .end:
+(rule 2 (gen_icmp_inner cc x y $I128)
+  (if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc))
+  (let ((x_lo Reg (value_regs_get x 0))
+        (x_hi Reg (value_regs_get x 1))
+        (y_lo Reg (value_regs_get y 0))
+        (y_hi Reg (value_regs_get y 1))
+        ;; Generate compares for both halves.
+        ;; The bottom compare depends on the IntCC.
+        (top_cmp Reg (gen_icmp_inner cc x_hi y_hi $I64))
+        (bottom_cmp Reg (rv_slt x_lo y_lo)))
+    ;; If the high parts are equal, the result only depends on the bottom
+    (gen_select_reg (IntCC.Equal) x_hi y_hi bottom_cmp top_cmp)))
+
+
+;; Compare both registers using xor, and set the result using the dedicated
+;; `seqz`/`snez` instructions.
+(rule (gen_icmp_inner (IntCC.Equal) x y (fits_in_64 ty))
+  (rv_seqz (rv_xor x y)))
+(rule (gen_icmp_inner (IntCC.NotEqual) x y (fits_in_64 ty))
+  (rv_snez (rv_xor x y)))
+
+;; In the I128 case we just `xor` everything and check if its zero at the end.
+(rule 1 (gen_icmp_inner (IntCC.Equal) x y $I128)
+  (let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
+        (bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))
+        (res Reg (rv_or top_eq bottom_eq)))
+    (rv_seqz res)))
+(rule 1 (gen_icmp_inner (IntCC.NotEqual) x y $I128)
+  (let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
+        (bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))
+        (res Reg (rv_or top_eq bottom_eq)))
+    (rv_snez res)))
+
+
+
+
+;; For some icmp's we can optimize the instruction sequence if the RHS is a constant.
+;;
+;; TODO: Currently we only have rules for <=64bit types. We can add more rules for I128's
+(decl gen_icmp_imm (IntCC ValueRegs i64 Type) Reg)
+
+;; This rule isn't totally necessary since we can just use `slti 0`, but
+;; it's the official mnemonic for this operation and gives a slightly nicer
+;; disassembly output.
+(rule 5 (gen_icmp_imm (IntCC.SignedLessThan) x 0 (fits_in_64 ty))
+  (rv_sltz (sext x ty $I64)))
+
+;; `sgtz` is preferable since our equivalent immediate lowering has to do `slt+xori`.
+(rule 5 (gen_icmp_imm (IntCC.SignedGreaterThan) x 0 (fits_in_64 ty))
+  (rv_sgtz (sext x ty $I64)))
+
+;; For these IntCC's we need to both add 1 to the immediate and invert the result.
+;; i.e. `x > imm` is the same as `!(x < imm + 1)`.
+(rule 4 (gen_icmp_imm cc x imm ty)
+  (if-let (IntCC.UnsignedGreaterThan) (intcc_unsigned cc))
+  (let ((res Reg (gen_icmp_imm (intcc_reverse cc) x (i64_add imm 1) ty)))
+    (rv_xori res (imm12_const 1))))
+
+;; We directly support the inverse of these condition codes. So lower the inverse
+;; and then invert the result using `xor`.
+;; i.e. `x >= imm` is the same as `!(x < imm)`.
+(rule 3 (gen_icmp_imm cc x imm ty)
+  (if-let (IntCC.UnsignedGreaterThanOrEqual) (intcc_unsigned cc))
+  (let ((res Reg (gen_icmp_imm (intcc_inverse cc) x imm ty)))
+    (rv_xori res (imm12_const 1))))
+
+;; Here we can add 1 to the immediate and use the reverse condition code.
+;; i.e. `x <= imm` is the same as `x < imm + 1`.
+(rule 2 (gen_icmp_imm cc x imm ty)
+  (if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc))
+  (gen_icmp_imm (intcc_without_equal cc) x (i64_add imm 1) ty))
+
+;; We have dedicated instructions for these two cases. (`slti`/`sltiu`)
+(rule 1 (gen_icmp_imm (IntCC.SignedLessThan) x n (fits_in_64 ty))
+  (if-let imm (imm12_sextend_i64 ty n))
+  (rv_slti (sext x ty $I64) imm))
+(rule 1 (gen_icmp_imm (IntCC.UnsignedLessThan) x n (fits_in_64 ty)) 
+  (if-let imm (imm12_sextend_i64 ty n))
+  (rv_sltiu (zext x ty $I64) imm))
+
+;; In the fallback case we load the immediate and then compare.
+(rule (gen_icmp_imm cc x n (fits_in_64 ty))
+  (let ((extend_op ExtendOp (icmp_intcc_extend cc ty))
+        (x_ext Reg (extend x extend_op ty $I64))
+        ;; TODO: Ideally we shouldn't need the sign extend instruction
+        ;; here. We should be able to do it at compile time. But our
+        ;; constant loading infrastructure isn't great yet.
+        (i Reg (imm $I64 (i64_as_u64 n)))
+        (y_ext Reg (extend i extend_op ty $I64)))
+    (gen_icmp cc x_ext y_ext $I64)))
+
 
 
 (decl i128_sub (ValueRegs ValueRegs) ValueRegs)
@@ -2492,8 +2685,8 @@
     ((r_const_neg_1 Reg (load_imm12 -1))
       (r_const_min Reg (rv_slli (load_imm12 1) (imm12_const 63)))
       (tmp_rs1 Reg (shift_int_to_most_significant rs1 ty))
-      (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty))
-      (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty))
+      (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 $I64))
+      (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 $I64))
       (test Reg (rv_and t1 t2)))
     (gen_trapif test (TrapCode.IntegerOverflow))))
 

@@ -1160,38 +1160,6 @@ impl MachInstEmit for Inst {
             &Inst::EBreak => {
                 sink.put4(0x00100073);
             }
-            &Inst::Icmp {
-                cc,
-                rd,
-                ref a,
-                ref b,
-                ty,
-            } => {
-                let a = alloc_value_regs(a, &mut allocs);
-                let b = alloc_value_regs(b, &mut allocs);
-                let rd = allocs.next_writable(rd);
-                let label_true = sink.get_label();
-                let label_false = sink.get_label();
-                Inst::lower_br_icmp(
-                    cc,
-                    a,
-                    b,
-                    BranchTarget::Label(label_true),
-                    BranchTarget::Label(label_false),
-                    ty,
-                )
-                .into_iter()
-                .for_each(|i| i.emit(&[], sink, emit_info, state));
-
-                sink.bind_label(label_true, &mut state.ctrl_plane);
-                Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
-                Inst::Jal {
-                    dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2),
-                }
-                .emit(&[], sink, emit_info, state);
-                sink.bind_label(label_false, &mut state.ctrl_plane);
-                Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
-            }
             &Inst::AtomicCas {
                 offset,
                 t0,

@@ -503,12 +503,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_def(rd);
         }
 
-        &Inst::Icmp { rd, a, b, .. } => {
-            collector.reg_uses(a.regs());
-            collector.reg_uses(b.regs());
-            collector.reg_def(rd);
-        }
-
         &Inst::SelectReg {
             rd,
             rs1,
@@ -1104,12 +1098,6 @@ impl Inst {
                     ty, dst, e, v, addr, t0, offset,
                 )
             }
-            &Inst::Icmp { cc, rd, a, b, ty } => {
-                let a = format_regs(a.regs(), allocs);
-                let b = format_regs(b.regs(), allocs);
-                let rd = format_reg(rd.to_reg(), allocs);
-                format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty)
-            }
             &Inst::IntSelect {
                 op,
                 ref dst,
@@ -1187,6 +1175,15 @@ impl Inst {
                 let rs2_s = format_reg(rs2, allocs);
                 let rd_s = format_reg(rd.to_reg(), allocs);
                 match alu_op {
+                    AluOPRRR::Slt if rs2 == zero_reg() => {
+                        format!("sltz {},{}", rd_s, rs1_s)
+                    }
+                    AluOPRRR::Slt if rs1 == zero_reg() => {
+                        format!("sgtz {},{}", rd_s, rs2_s)
+                    }
+                    AluOPRRR::SltU if rs1 == zero_reg() => {
+                        format!("snez {},{}", rd_s, rs2_s)
+                    }
                     AluOPRRR::Adduw if rs2 == zero_reg() => {
                         format!("zext.w {},{}", rd_s, rs1_s)
                     }

@@ -833,18 +833,18 @@
   (lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
   (gen_store_128 p offset flags x))
 
-(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
-(rule
-  (gen_icmp cc x y ty)
-  (let
-    ((result WritableReg (temp_writable_reg $I64))
-      (_ Unit (emit (MInst.Icmp cc result x y ty))))
-    result))
-
 ;;;;;  Rules for `icmp`;;;;;;;;;
-(rule
-  (lower (icmp cc x @ (value_type ty) y))
-  (lower_icmp cc x y ty))
+(rule (lower (icmp cc x @ (value_type ty) y))
+  (gen_icmp cc x y ty))
+
+;; We have some optimizations that we can do when one of the sides is
+;; a constant.
+(rule 1 (lower (icmp cc x @ (value_type (fits_in_64 ty)) (u64_from_iconst y)))
+  (gen_icmp_imm cc x (i64_sextend_imm64 ty (imm64 y)) ty))
+
+;; If the const is on the LHS, move it to the right and flip the IntCC.
+(rule 2 (lower (icmp cc (u64_from_iconst x) y @ (value_type (fits_in_64 ty))))
+  (gen_icmp_imm (intcc_reverse cc) y (i64_sextend_imm64 ty (imm64 x)) ty))
 
 ;;;;;  Rules for `fcmp`;;;;;;;;;
 (rule