Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
47b2c59
riscv64: Add icmp tests
afonso360 Mar 24, 2023
a083500
riscv64: Move lower_icmp to inst.isle
afonso360 Mar 24, 2023
fda1f38
riscv64: Optimize `icmp` codegen for <=i64 types
afonso360 Mar 24, 2023
472d593
riscv64: Improve `icmp.i128` codegen
afonso360 Mar 25, 2023
3211c73
riscv64: Add `icmp_imm` fallback case
afonso360 Mar 25, 2023
6a0662e
riscv64: Add `sltz`/`sgtz` mnemonics
afonso360 Mar 25, 2023
a99597a
riscv64: Add optimizations for `icmp.slt` with 0
afonso360 Mar 25, 2023
90fc5e4
riscv64: Add optimizations for `icmp.sgt` with 0
afonso360 Mar 25, 2023
eecd74d
riscv64: Add `icmp.slt` with imm12 const optimizations
afonso360 Mar 25, 2023
172709b
riscv64: Add `icmp.ult` with imm12 const optimizations
afonso360 Mar 25, 2023
dbf51a1
riscv64: Use better extractors for icmp
afonso360 Mar 25, 2023
b83324f
riscv64: Reorder icmp rules
afonso360 Mar 25, 2023
e3bcf75
riscv64: Improve Icmp Imm codegen
afonso360 Mar 26, 2023
e822ca5
cranelift: Delete `u64_gt`
afonso360 Mar 28, 2023
0298962
riscv64: Add `Signedness` struct
afonso360 Mar 28, 2023
1050eac
riscv64: Clarify comment
afonso360 Mar 28, 2023
297e44d
Revert "riscv64: Add `Signedness` struct"
afonso360 Mar 31, 2023
2484e7f
riscv64: Use `select_reg` for i128 rules
afonso360 Apr 4, 2023
2e729fb
riscv64: Handle `icmp+imm` when the imm is on the LHS
afonso360 Apr 4, 2023
efd86ed
riscv64: Improve handling of i8 icmp_imm's
afonso360 Apr 7, 2023
17ff0e6
riscv64: Remove unused code
afonso360 Apr 7, 2023
9ec9ad7
riscv64: Fixup tests
afonso360 Apr 13, 2023
0e419c4
riscv64: Cleanup `icmp` rules
afonso360 Apr 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 210 additions & 17 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,6 @@
(rs OptionReg)
(imm OptionUimm5)
(csr CsrAddress))
;; an integer compare.
(Icmp
(cc IntCC)
(rd WritableReg)
(a ValueRegs)
(b ValueRegs)
(ty Type))
;; select a reg base on condition.
;; very useful because in lowering stage we can not have condition branch.
(SelectReg
Expand Down Expand Up @@ -755,6 +748,7 @@
(writable_reg_to_reg rd)))



;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; RV32I Base Integer Instruction Set
Expand Down Expand Up @@ -861,12 +855,36 @@
(rule (rv_andi rs1 imm)
(alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))

;; Helper for emitting the `slt` ("Set Less Than") instruction.
;; rd ← rs1 < rs2
(decl rv_slt (Reg Reg) Reg)
(rule (rv_slt rs1 rs2)
(alu_rrr (AluOPRRR.Slt) rs1 rs2))

;; Helper for emitting the `sltz` instruction.
;; This instruction is a mnemonic for `slt rd, rs, zero`.
(decl rv_sltz (Reg) Reg)
(rule (rv_sltz rs1)
(rv_slt rs1 (zero_reg)))

;; Helper for emitting the `sgtz` instruction.
;; This instruction is a mnemonic for `slt rd, zero, rs`.
(decl rv_sgtz (Reg) Reg)
(rule (rv_sgtz rs1)
(rv_slt (zero_reg) rs1))

;; Helper for emiting the `slti` ("Set Less Than Immediate") instruction.
;; rd ← rs1 < imm
(decl rv_slti (Reg Imm12) Reg)
(rule (rv_slti rs1 imm)
(alu_rr_imm12 (AluOPRRI.Slti) rs1 imm))

;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
;; rd ← rs1 < rs2
(decl rv_sltu (Reg Reg) Reg)
(rule (rv_sltu rs1 rs2)
(alu_rrr (AluOPRRR.SltU) rs1 rs2))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like some trailing whitespace got added here. You can use git diff --check to check a range of commits for any whitespace issues like that.

;; Helper for emitting the `snez` instruction.
;; This instruction is a mnemonic for `sltu rd, zero, rs`.
(decl rv_snez (Reg) Reg)
Expand Down Expand Up @@ -1311,6 +1329,11 @@
(decl imm12_from_u64 (Imm12) u64)
(extern extractor imm12_from_u64 imm12_from_u64)

;; Extracts an imm12 from an i64. The i64 must be in a range that can be
;; represented as an imm12. The value is sign-extended acording to the type
;; provided.
(decl pure partial imm12_sextend_i64 (Type i64) Imm12)
(extern constructor imm12_sextend_i64 imm12_sextend_i64)


;; Float Helpers
Expand Down Expand Up @@ -2262,13 +2285,183 @@
(move_x_to_f tmp2 ty)))


;;; lower icmp
(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg)
(rule 1 (lower_icmp cc x y ty)
(if (signed_cond_code cc))
(gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty))
(rule (lower_icmp cc x y ty)
(gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty))


;; For Equal and NotEqual it doesen't matter the type of extension that we
;; perform, as long as we are consistent on both sides. So try to pick the
;; ExtendOp's that have a dedicated instruction in the Base ISA.
;;
;; The special cases here are:
;; - i8 -> any: we should prefer ExtendOp.Zero
;; - i32 -> i64: we should prefer ExtendOp.Signed
;;
;; We only handle the signed case here since the unsigned case is the default
;; for `intcc_to_extend_op`. The other cases lower into a two instruction
;; sequence.
(decl icmp_intcc_extend (IntCC Type) ExtendOp)
(rule 1 (icmp_intcc_extend (IntCC.Equal) $I32) (ExtendOp.Signed))
(rule 1 (icmp_intcc_extend (IntCC.NotEqual) $I32) (ExtendOp.Signed))
(rule (icmp_intcc_extend cc _) (intcc_to_extend_op cc))


;; Generates an icmp sequence for the given type.
(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)

;; On I128's we don't need any extension.
(rule 1 (gen_icmp cc x y $I128)
(gen_icmp_inner cc x y $I128))

;; Otherwise emit the extension sequence before the comparision.
(rule (gen_icmp cc x y (fits_in_64 ty))
(let ((extend_op ExtendOp (icmp_intcc_extend cc ty))
(x_ext Reg (extend x extend_op ty $I64))
(y_ext Reg (extend y extend_op ty $I64)))
(gen_icmp_inner cc x_ext y_ext ty)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that the operands were just extended to I64, should gen_icmp_inner be called with $I64 instead of ty?




;; This emits just the comparision instructions and assumes that
;; the arguments are already extended.
;;
;; We only have actual lowerings for Equal/NotEqual/SignedLessThan/UnsignedLessThan.
;; Everything else just recurses into one of those cases.
(decl gen_icmp_inner (IntCC ValueRegs ValueRegs Type) Reg)

;; We only implement LessThan, for GreaterThan we just reverse the arguments
(rule 4 (gen_icmp_inner cc x y ty)
(if (intcc_greater_than cc))
(gen_icmp_inner (intcc_reverse cc) y x ty))

;; For these rules, we can just use the normal rules and then invert the result.
;; i.e. `x <= y` is the same as `!(x > y)`.
(rule 3 (gen_icmp_inner cc x y ty)
(if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc))
(let ((res Reg (gen_icmp_inner (intcc_inverse cc) x y ty)))
(rv_xori res (imm12_const 1))))


;; For `*LessThan` we have a dedicated instruction `slt`/`sltu`.
(rule (gen_icmp_inner (IntCC.SignedLessThan) x y (fits_in_64 ty))
(rv_slt x y))
(rule (gen_icmp_inner (IntCC.UnsignedLessThan) x y (fits_in_64 ty))
(rv_sltu x y))


;; Compare the top halves of the two values. If they are equal, then
;; we can just check the bottom halves. Otherwise we only need to check
;; the top halves.
;;
;; In both signed and unsigned variants the bottom half is always compared
;; as unsigned. Since we know that the top halves are equal both signs
;; are the same. If the number is positive, this is fairly straight forward
;; and if the number is negative in the signed case we can still use
;; an unsigned comparision due to the way two's complement works.
;;
;; As an example: 0xFFFE < 0xFFFF Here both are negative numbers, but when
;; considering only at the bottom byte, 0xFE is smaller than 0xFF when viewed
;; as unsigned. This also holds true when viewing both of these numbers as
;; signed (-2 < -1), so we can use the unsigned comparison for the bottom half.
;;
;; Emit the following sequence:
;; slt{,u} t1, x_hi, y_hi
;; sltu t2, x_lo, y_lo
;; beq x_hi, y_hi, .top_is_equal
;; mov rd, t1
;; j .end
;; .top_is_equal:
;; mov rd, t2
;; .end:
(rule 2 (gen_icmp_inner cc x y $I128)
(if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc))
(let ((x_lo Reg (value_regs_get x 0))
(x_hi Reg (value_regs_get x 1))
(y_lo Reg (value_regs_get y 0))
(y_hi Reg (value_regs_get y 1))
;; Generate compares for both halves.
;; The bottom compare depends on the IntCC.
(top_cmp Reg (gen_icmp_inner cc x_hi y_hi $I64))
(bottom_cmp Reg (rv_slt x_lo y_lo)))
;; If the high parts are equal, the result only depends on the bottom
(gen_select_reg (IntCC.Equal) x_hi y_hi bottom_cmp top_cmp)))


;; Compare both registers using xor, and set the result using the dedicated
;; `seqz`/`snez` instructions.
(rule (gen_icmp_inner (IntCC.Equal) x y (fits_in_64 ty))
(rv_seqz (rv_xor x y)))
(rule (gen_icmp_inner (IntCC.NotEqual) x y (fits_in_64 ty))
(rv_snez (rv_xor x y)))

;; In the I128 case we just `xor` everything and check if its zero at the end.
(rule 1 (gen_icmp_inner (IntCC.Equal) x y $I128)
(let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
(bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))
(res Reg (rv_or top_eq bottom_eq)))
(rv_seqz res)))
(rule 1 (gen_icmp_inner (IntCC.NotEqual) x y $I128)
(let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
(bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))
(res Reg (rv_or top_eq bottom_eq)))
(rv_snez res)))




;; For some icmp's we can optimize the instruction sequence if the RHS is a constant.
;;
;; TODO: Currently we only have rules for <=64bit types. We can add more rules for I128's
(decl gen_icmp_imm (IntCC ValueRegs i64 Type) Reg)

;; This rule isn't totally necessary since we can just use `slti 0`, but
;; it's the official mnemonic for this operation and gives a slightly nicer
;; disassembly output.
(rule 5 (gen_icmp_imm (IntCC.SignedLessThan) x 0 (fits_in_64 ty))
(rv_sltz (sext x ty $I64)))

;; `sgtz` is preferable since our equivalent immediate lowering has to do `slt+xori`.
(rule 5 (gen_icmp_imm (IntCC.SignedGreaterThan) x 0 (fits_in_64 ty))
(rv_sgtz (sext x ty $I64)))

;; For these IntCC's we need to both add 1 to the immediate and invert the result.
;; i.e. `x > imm` is the same as `!(x < imm + 1)`.
(rule 4 (gen_icmp_imm cc x imm ty)
(if-let (IntCC.UnsignedGreaterThan) (intcc_unsigned cc))
(let ((res Reg (gen_icmp_imm (intcc_reverse cc) x (i64_add imm 1) ty)))
(rv_xori res (imm12_const 1))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding 1 to the constant may overflow. It isn't immediately obvious to me that this rule is always equivalent in that case.

If the constant is the maximum value for the given ty and the signedness of cc, and cc is *GreaterThan, then for all x this icmp must return false.

If we add 1 modulo the width of ty, then the result wraps around to the minimum for ty/cc. Then for all x, !(x < min) will be true. Which would be wrong.

However this doesn't add modulo the width of ty. Instead, it's a signed 64-bit addition. If ty is narrower than 64-bit, then x is always less than the result (whether it's compared signed or unsigned), so the inverse of that result is always false, which is correct.

If ty is I64 and cc is SignedGreaterThan, then the signed 64-bit addition overflows. Then checking signed less-than will be false, the final result will be true, so the rule is incorrect.

If ty is I64 and cc is UnsignedGreaterThan, then the signed 64-bit addition does not overflow, but interpreting it as unsigned does overflow. Then checking unsigned less-than is false, etc.

So I think this rule is wrong in case of overflow when ty is I64, isn't it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this case used to be covered when we used Imm12 instead of i64. Since imm12_add is partial we would have that automatically checked for us. I forgot to check for that when changing it to i64.


;; We directly support the inverse of these condition codes. So lower the inverse
;; and then invert the result using `xor`.
;; i.e. `x >= imm` is the same as `!(x < imm)`.
(rule 3 (gen_icmp_imm cc x imm ty)
(if-let (IntCC.UnsignedGreaterThanOrEqual) (intcc_unsigned cc))
(let ((res Reg (gen_icmp_imm (intcc_inverse cc) x imm ty)))
(rv_xori res (imm12_const 1))))

;; Here we can add 1 to the immediate and use the reverse condition code.
;; i.e. `x <= imm` is the same as `x < imm + 1`.
(rule 2 (gen_icmp_imm cc x imm ty)
(if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc))
(gen_icmp_imm (intcc_without_equal cc) x (i64_add imm 1) ty))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, I think this rule is wrong if adding 1 overflows and ty is I64.


;; We have dedicated instructions for these two cases. (`slti`/`sltiu`)
(rule 1 (gen_icmp_imm (IntCC.SignedLessThan) x n (fits_in_64 ty))
(if-let imm (imm12_sextend_i64 ty n))
(rv_slti (sext x ty $I64) imm))
(rule 1 (gen_icmp_imm (IntCC.UnsignedLessThan) x n (fits_in_64 ty))
(if-let imm (imm12_sextend_i64 ty n))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is sign-extending the constant the right thing to do for an unsigned comparison?

(rv_sltiu (zext x ty $I64) imm))

;; In the fallback case we load the immediate and then compare.
(rule (gen_icmp_imm cc x n (fits_in_64 ty))
(let ((extend_op ExtendOp (icmp_intcc_extend cc ty))
(x_ext Reg (extend x extend_op ty $I64))
;; TODO: Ideally we shouldn't need the sign extend instruction
;; here. We should be able to do it at compile time. But our
;; constant loading infrastructure isn't great yet.
(i Reg (imm $I64 (i64_as_u64 n)))
(y_ext Reg (extend i extend_op ty $I64)))
(gen_icmp cc x_ext y_ext $I64)))



(decl i128_sub (ValueRegs ValueRegs) ValueRegs)
Expand Down Expand Up @@ -2492,8 +2685,8 @@
((r_const_neg_1 Reg (load_imm12 -1))
(r_const_min Reg (rv_slli (load_imm12 1) (imm12_const 63)))
(tmp_rs1 Reg (shift_int_to_most_significant rs1 ty))
(t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty))
(t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty))
(t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 $I64))
(t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 $I64))
(test Reg (rv_and t1 t2)))
(gen_trapif test (TrapCode.IntegerOverflow))))

Expand Down
32 changes: 0 additions & 32 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1160,38 +1160,6 @@ impl MachInstEmit for Inst {
&Inst::EBreak => {
sink.put4(0x00100073);
}
&Inst::Icmp {
cc,
rd,
ref a,
ref b,
ty,
} => {
let a = alloc_value_regs(a, &mut allocs);
let b = alloc_value_regs(b, &mut allocs);
let rd = allocs.next_writable(rd);
let label_true = sink.get_label();
let label_false = sink.get_label();
Inst::lower_br_icmp(
cc,
a,
b,
BranchTarget::Label(label_true),
BranchTarget::Label(label_false),
ty,
)
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));

sink.bind_label(label_true, &mut state.ctrl_plane);
Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
Inst::Jal {
dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2),
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_false, &mut state.ctrl_plane);
Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
}
&Inst::AtomicCas {
offset,
t0,
Expand Down
21 changes: 9 additions & 12 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -503,12 +503,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd);
}

&Inst::Icmp { rd, a, b, .. } => {
collector.reg_uses(a.regs());
collector.reg_uses(b.regs());
collector.reg_def(rd);
}

&Inst::SelectReg {
rd,
rs1,
Expand Down Expand Up @@ -1104,12 +1098,6 @@ impl Inst {
ty, dst, e, v, addr, t0, offset,
)
}
&Inst::Icmp { cc, rd, a, b, ty } => {
let a = format_regs(a.regs(), allocs);
let b = format_regs(b.regs(), allocs);
let rd = format_reg(rd.to_reg(), allocs);
format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty)
}
&Inst::IntSelect {
op,
ref dst,
Expand Down Expand Up @@ -1187,6 +1175,15 @@ impl Inst {
let rs2_s = format_reg(rs2, allocs);
let rd_s = format_reg(rd.to_reg(), allocs);
match alu_op {
AluOPRRR::Slt if rs2 == zero_reg() => {
format!("sltz {},{}", rd_s, rs1_s)
}
AluOPRRR::Slt if rs1 == zero_reg() => {
format!("sgtz {},{}", rd_s, rs2_s)
}
AluOPRRR::SltU if rs1 == zero_reg() => {
format!("snez {},{}", rd_s, rs2_s)
}
AluOPRRR::Adduw if rs2 == zero_reg() => {
format!("zext.w {},{}", rd_s, rs1_s)
}
Expand Down
22 changes: 11 additions & 11 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -833,18 +833,18 @@
(lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
(gen_store_128 p offset flags x))

(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
(rule
(gen_icmp cc x y ty)
(let
((result WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Icmp cc result x y ty))))
result))

;;;;; Rules for `icmp`;;;;;;;;;
(rule
(lower (icmp cc x @ (value_type ty) y))
(lower_icmp cc x y ty))
(rule (lower (icmp cc x @ (value_type ty) y))
(gen_icmp cc x y ty))

;; We have some optimizations that we can do when one of the sides is
;; a constant.
(rule 1 (lower (icmp cc x @ (value_type (fits_in_64 ty)) (u64_from_iconst y)))
(gen_icmp_imm cc x (i64_sextend_imm64 ty (imm64 y)) ty))

;; If the const is on the LHS, move it to the right and flip the IntCC.
(rule 2 (lower (icmp cc (u64_from_iconst x) y @ (value_type (fits_in_64 ty))))
(gen_icmp_imm (intcc_reverse cc) y (i64_sextend_imm64 ty (imm64 x)) ty))

;;;;; Rules for `fcmp`;;;;;;;;;
(rule
Expand Down
Loading