-
Notifications
You must be signed in to change notification settings - Fork 1.6k
riscv64: Improve icmp codegen
#6112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
47b2c59
a083500
fda1f38
472d593
3211c73
6a0662e
a99597a
90fc5e4
eecd74d
172709b
dbf51a1
b83324f
e3bcf75
e822ca5
0298962
1050eac
297e44d
2484e7f
2e729fb
efd86ed
17ff0e6
9ec9ad7
0e419c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -234,13 +234,6 @@ | |
| (rs OptionReg) | ||
| (imm OptionUimm5) | ||
| (csr CsrAddress)) | ||
| ;; an integer compare. | ||
| (Icmp | ||
| (cc IntCC) | ||
| (rd WritableReg) | ||
| (a ValueRegs) | ||
| (b ValueRegs) | ||
| (ty Type)) | ||
| ;; select a reg base on condition. | ||
| ;; very useful because in lowering stage we can not have condition branch. | ||
| (SelectReg | ||
|
|
@@ -755,6 +748,7 @@ | |
| (writable_reg_to_reg rd))) | ||
|
|
||
|
|
||
|
|
||
| ;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
|
|
||
| ;; RV32I Base Integer Instruction Set | ||
|
|
@@ -861,12 +855,36 @@ | |
| (rule (rv_andi rs1 imm) | ||
| (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm)) | ||
|
|
||
| ;; Helper for emitting the `slt` ("Set Less Than") instruction. | ||
| ;; rd ← rs1 < rs2 | ||
| (decl rv_slt (Reg Reg) Reg) | ||
| (rule (rv_slt rs1 rs2) | ||
| (alu_rrr (AluOPRRR.Slt) rs1 rs2)) | ||
|
|
||
| ;; Helper for emitting the `sltz` instruction. | ||
| ;; This instruction is a mnemonic for `slt rd, rs, zero`. | ||
| (decl rv_sltz (Reg) Reg) | ||
| (rule (rv_sltz rs1) | ||
| (rv_slt rs1 (zero_reg))) | ||
|
|
||
| ;; Helper for emitting the `sgtz` instruction. | ||
| ;; This instruction is a mnemonic for `slt rd, zero, rs`. | ||
| (decl rv_sgtz (Reg) Reg) | ||
| (rule (rv_sgtz rs1) | ||
| (rv_slt (zero_reg) rs1)) | ||
|
|
||
| ;; Helper for emiting the `slti` ("Set Less Than Immediate") instruction. | ||
| ;; rd ← rs1 < imm | ||
| (decl rv_slti (Reg Imm12) Reg) | ||
| (rule (rv_slti rs1 imm) | ||
| (alu_rr_imm12 (AluOPRRI.Slti) rs1 imm)) | ||
|
|
||
| ;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction. | ||
| ;; rd ← rs1 < rs2 | ||
| (decl rv_sltu (Reg Reg) Reg) | ||
| (rule (rv_sltu rs1 rs2) | ||
| (alu_rrr (AluOPRRR.SltU) rs1 rs2)) | ||
|
|
||
| ;; Helper for emitting the `snez` instruction. | ||
| ;; This instruction is a mnemonic for `sltu rd, zero, rs`. | ||
| (decl rv_snez (Reg) Reg) | ||
|
|
@@ -1311,6 +1329,11 @@ | |
| (decl imm12_from_u64 (Imm12) u64) | ||
| (extern extractor imm12_from_u64 imm12_from_u64) | ||
|
|
||
| ;; Extracts an imm12 from an i64. The i64 must be in a range that can be | ||
| ;; represented as an imm12. The value is sign-extended acording to the type | ||
| ;; provided. | ||
| (decl pure partial imm12_sextend_i64 (Type i64) Imm12) | ||
| (extern constructor imm12_sextend_i64 imm12_sextend_i64) | ||
|
|
||
|
|
||
| ;; Float Helpers | ||
|
|
@@ -2262,13 +2285,183 @@ | |
| (move_x_to_f tmp2 ty))) | ||
|
|
||
|
|
||
| ;;; lower icmp | ||
| (decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg) | ||
| (rule 1 (lower_icmp cc x y ty) | ||
| (if (signed_cond_code cc)) | ||
| (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty)) | ||
| (rule (lower_icmp cc x y ty) | ||
| (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty)) | ||
|
|
||
|
|
||
| ;; For Equal and NotEqual it doesen't matter the type of extension that we | ||
| ;; perform, as long as we are consistent on both sides. So try to pick the | ||
| ;; ExtendOp's that have a dedicated instruction in the Base ISA. | ||
| ;; | ||
| ;; The special cases here are: | ||
| ;; - i8 -> any: we should prefer ExtendOp.Zero | ||
| ;; - i32 -> i64: we should prefer ExtendOp.Signed | ||
| ;; | ||
| ;; We only handle the signed case here since the unsigned case is the default | ||
| ;; for `intcc_to_extend_op`. The other cases lower into a two instruction | ||
| ;; sequence. | ||
| (decl icmp_intcc_extend (IntCC Type) ExtendOp) | ||
| (rule 1 (icmp_intcc_extend (IntCC.Equal) $I32) (ExtendOp.Signed)) | ||
| (rule 1 (icmp_intcc_extend (IntCC.NotEqual) $I32) (ExtendOp.Signed)) | ||
| (rule (icmp_intcc_extend cc _) (intcc_to_extend_op cc)) | ||
|
|
||
|
|
||
| ;; Generates an icmp sequence for the given type. | ||
| (decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg) | ||
|
|
||
| ;; On I128's we don't need any extension. | ||
| (rule 1 (gen_icmp cc x y $I128) | ||
| (gen_icmp_inner cc x y $I128)) | ||
|
|
||
| ;; Otherwise emit the extension sequence before the comparision. | ||
| (rule (gen_icmp cc x y (fits_in_64 ty)) | ||
| (let ((extend_op ExtendOp (icmp_intcc_extend cc ty)) | ||
| (x_ext Reg (extend x extend_op ty $I64)) | ||
| (y_ext Reg (extend y extend_op ty $I64))) | ||
| (gen_icmp_inner cc x_ext y_ext ty))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that the operands were just extended to I64, should |
||
|
|
||
|
|
||
|
|
||
| ;; This emits just the comparision instructions and assumes that | ||
| ;; the arguments are already extended. | ||
| ;; | ||
| ;; We only have actual lowerings for Equal/NotEqual/SignedLessThan/UnsignedLessThan. | ||
| ;; Everything else just recurses into one of those cases. | ||
| (decl gen_icmp_inner (IntCC ValueRegs ValueRegs Type) Reg) | ||
|
|
||
| ;; We only implement LessThan, for GreaterThan we just reverse the arguments | ||
| (rule 4 (gen_icmp_inner cc x y ty) | ||
| (if (intcc_greater_than cc)) | ||
| (gen_icmp_inner (intcc_reverse cc) y x ty)) | ||
|
|
||
| ;; For these rules, we can just use the normal rules and then invert the result. | ||
| ;; i.e. `x <= y` is the same as `!(x > y)`. | ||
| (rule 3 (gen_icmp_inner cc x y ty) | ||
| (if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc)) | ||
| (let ((res Reg (gen_icmp_inner (intcc_inverse cc) x y ty))) | ||
| (rv_xori res (imm12_const 1)))) | ||
|
|
||
|
|
||
| ;; For `*LessThan` we have a dedicated instruction `slt`/`sltu`. | ||
| (rule (gen_icmp_inner (IntCC.SignedLessThan) x y (fits_in_64 ty)) | ||
| (rv_slt x y)) | ||
| (rule (gen_icmp_inner (IntCC.UnsignedLessThan) x y (fits_in_64 ty)) | ||
| (rv_sltu x y)) | ||
|
|
||
|
|
||
| ;; Compare the top halves of the two values. If they are equal, then | ||
| ;; we can just check the bottom halves. Otherwise we only need to check | ||
| ;; the top halves. | ||
| ;; | ||
| ;; In both signed and unsigned variants the bottom half is always compared | ||
| ;; as unsigned. Since we know that the top halves are equal both signs | ||
| ;; are the same. If the number is positive, this is fairly straight forward | ||
| ;; and if the number is negative in the signed case we can still use | ||
| ;; an unsigned comparision due to the way two's complement works. | ||
| ;; | ||
| ;; As an example: 0xFFFE < 0xFFFF Here both are negative numbers, but when | ||
| ;; considering only at the bottom byte, 0xFE is smaller than 0xFF when viewed | ||
| ;; as unsigned. This also holds true when viewing both of these numbers as | ||
| ;; signed (-2 < -1), so we can use the unsigned comparison for the bottom half. | ||
| ;; | ||
| ;; Emit the following sequence: | ||
| ;; slt{,u} t1, x_hi, y_hi | ||
| ;; sltu t2, x_lo, y_lo | ||
| ;; beq x_hi, y_hi, .top_is_equal | ||
| ;; mov rd, t1 | ||
| ;; j .end | ||
| ;; .top_is_equal: | ||
| ;; mov rd, t2 | ||
| ;; .end: | ||
| (rule 2 (gen_icmp_inner cc x y $I128) | ||
| (if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc)) | ||
| (let ((x_lo Reg (value_regs_get x 0)) | ||
| (x_hi Reg (value_regs_get x 1)) | ||
| (y_lo Reg (value_regs_get y 0)) | ||
| (y_hi Reg (value_regs_get y 1)) | ||
| ;; Generate compares for both halves. | ||
| ;; The bottom compare depends on the IntCC. | ||
| (top_cmp Reg (gen_icmp_inner cc x_hi y_hi $I64)) | ||
| (bottom_cmp Reg (rv_slt x_lo y_lo))) | ||
| ;; If the high parts are equal, the result only depends on the bottom | ||
| (gen_select_reg (IntCC.Equal) x_hi y_hi bottom_cmp top_cmp))) | ||
|
|
||
|
|
||
| ;; Compare both registers using xor, and set the result using the dedicated | ||
| ;; `seqz`/`snez` instructions. | ||
| (rule (gen_icmp_inner (IntCC.Equal) x y (fits_in_64 ty)) | ||
| (rv_seqz (rv_xor x y))) | ||
| (rule (gen_icmp_inner (IntCC.NotEqual) x y (fits_in_64 ty)) | ||
| (rv_snez (rv_xor x y))) | ||
|
|
||
| ;; In the I128 case we just `xor` everything and check if its zero at the end. | ||
| (rule 1 (gen_icmp_inner (IntCC.Equal) x y $I128) | ||
| (let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0))) | ||
| (bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1))) | ||
| (res Reg (rv_or top_eq bottom_eq))) | ||
| (rv_seqz res))) | ||
| (rule 1 (gen_icmp_inner (IntCC.NotEqual) x y $I128) | ||
| (let ((top_eq Reg (rv_xor (value_regs_get x 0) (value_regs_get y 0))) | ||
| (bottom_eq Reg (rv_xor (value_regs_get x 1) (value_regs_get y 1))) | ||
| (res Reg (rv_or top_eq bottom_eq))) | ||
| (rv_snez res))) | ||
|
|
||
|
|
||
|
|
||
|
|
||
| ;; For some icmp's we can optimize the instruction sequence if the RHS is a constant. | ||
| ;; | ||
| ;; TODO: Currently we only have rules for <=64bit types. We can add more rules for I128's | ||
| (decl gen_icmp_imm (IntCC ValueRegs i64 Type) Reg) | ||
|
|
||
| ;; This rule isn't totally necessary since we can just use `slti 0`, but | ||
| ;; it's the official mnemonic for this operation and gives a slightly nicer | ||
| ;; disassembly output. | ||
| (rule 5 (gen_icmp_imm (IntCC.SignedLessThan) x 0 (fits_in_64 ty)) | ||
| (rv_sltz (sext x ty $I64))) | ||
|
|
||
| ;; `sgtz` is preferable since our equivalent immediate lowering has to do `slt+xori`. | ||
| (rule 5 (gen_icmp_imm (IntCC.SignedGreaterThan) x 0 (fits_in_64 ty)) | ||
| (rv_sgtz (sext x ty $I64))) | ||
|
|
||
| ;; For these IntCC's we need to both add 1 to the immediate and invert the result. | ||
| ;; i.e. `x > imm` is the same as `!(x < imm + 1)`. | ||
| (rule 4 (gen_icmp_imm cc x imm ty) | ||
| (if-let (IntCC.UnsignedGreaterThan) (intcc_unsigned cc)) | ||
| (let ((res Reg (gen_icmp_imm (intcc_reverse cc) x (i64_add imm 1) ty))) | ||
| (rv_xori res (imm12_const 1)))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding 1 to the constant may overflow. It isn't immediately obvious to me that this rule is always equivalent in that case. If the constant is the maximum value for the given If we add 1 modulo the width of However this doesn't add modulo the width of If If So I think this rule is wrong in case of overflow when
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this case used to be covered when we used Imm12 instead of i64. Since |
||
|
|
||
| ;; We directly support the inverse of these condition codes. So lower the inverse | ||
| ;; and then invert the result using `xor`. | ||
| ;; i.e. `x >= imm` is the same as `!(x < imm)`. | ||
| (rule 3 (gen_icmp_imm cc x imm ty) | ||
| (if-let (IntCC.UnsignedGreaterThanOrEqual) (intcc_unsigned cc)) | ||
| (let ((res Reg (gen_icmp_imm (intcc_inverse cc) x imm ty))) | ||
| (rv_xori res (imm12_const 1)))) | ||
|
|
||
| ;; Here we can add 1 to the immediate and use the reverse condition code. | ||
| ;; i.e. `x <= imm` is the same as `x < imm + 1`. | ||
| (rule 2 (gen_icmp_imm cc x imm ty) | ||
| (if-let (IntCC.UnsignedLessThanOrEqual) (intcc_unsigned cc)) | ||
| (gen_icmp_imm (intcc_without_equal cc) x (i64_add imm 1) ty)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, I think this rule is wrong if adding 1 overflows and |
||
|
|
||
| ;; We have dedicated instructions for these two cases. (`slti`/`sltiu`) | ||
| (rule 1 (gen_icmp_imm (IntCC.SignedLessThan) x n (fits_in_64 ty)) | ||
| (if-let imm (imm12_sextend_i64 ty n)) | ||
| (rv_slti (sext x ty $I64) imm)) | ||
| (rule 1 (gen_icmp_imm (IntCC.UnsignedLessThan) x n (fits_in_64 ty)) | ||
| (if-let imm (imm12_sextend_i64 ty n)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is sign-extending the constant the right thing to do for an unsigned comparison? |
||
| (rv_sltiu (zext x ty $I64) imm)) | ||
|
|
||
| ;; In the fallback case we load the immediate and then compare. | ||
| (rule (gen_icmp_imm cc x n (fits_in_64 ty)) | ||
| (let ((extend_op ExtendOp (icmp_intcc_extend cc ty)) | ||
| (x_ext Reg (extend x extend_op ty $I64)) | ||
| ;; TODO: Ideally we shouldn't need the sign extend instruction | ||
| ;; here. We should be able to do it at compile time. But our | ||
| ;; constant loading infrastructure isn't great yet. | ||
| (i Reg (imm $I64 (i64_as_u64 n))) | ||
| (y_ext Reg (extend i extend_op ty $I64))) | ||
| (gen_icmp cc x_ext y_ext $I64))) | ||
|
|
||
|
|
||
|
|
||
| (decl i128_sub (ValueRegs ValueRegs) ValueRegs) | ||
|
|
@@ -2492,8 +2685,8 @@ | |
| ((r_const_neg_1 Reg (load_imm12 -1)) | ||
| (r_const_min Reg (rv_slli (load_imm12 1) (imm12_const 63))) | ||
| (tmp_rs1 Reg (shift_int_to_most_significant rs1 ty)) | ||
| (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty)) | ||
| (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty)) | ||
| (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 $I64)) | ||
| (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 $I64)) | ||
| (test Reg (rv_and t1 t2))) | ||
| (gen_trapif test (TrapCode.IntegerOverflow)))) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like some trailing whitespace got added here. You can use
git diff --checkto check a range of commits for any whitespace issues like that.