From e1d6b5acfcd79781239be016161b22f3a255cbf6 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 11 Apr 2023 11:45:18 +0100 Subject: [PATCH 1/8] egraphs: Add a bmask bit pattern optimization --- cranelift/codegen/src/opts/bitops.isle | 14 ++++++++++ .../filetests/filetests/egraph/bitops.clif | 18 +++++++++++++ .../filetests/filetests/runtests/bitops.clif | 27 +++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index bf32251ca071..c75d8225f487 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -92,3 +92,17 @@ (rule (simplify (bxor ty x (iconst ty k))) (if-let -1 (i64_sextend_imm64 ty k)) (bnot ty x)) + +;; bmask(input) = !(((input | ((!input) + 1)) >> 63) - 1) +(rule (simplify (bnot ty + (isub ty + (ushr ty + (bor ty + input + (iadd ty + (bnot ty input) + (iconst ty (u64_from_imm64 1)))) + (iconst ty (u64_from_imm64 shift_amt))) + (iconst ty (u64_from_imm64 1))))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (bmask ty input)) diff --git a/cranelift/filetests/filetests/egraph/bitops.clif b/cranelift/filetests/filetests/egraph/bitops.clif index 88964da5aeed..dbda7520ea34 100644 --- a/cranelift/filetests/filetests/egraph/bitops.clif +++ b/cranelift/filetests/filetests/egraph/bitops.clif @@ -137,3 +137,21 @@ block0(v1: i64): ; check: v5 = bnot v1 ; check: return v5 + + +function %bitops_bmask(i64) -> i64 { +block0(v0: i64): + v1 = bnot v0 + v2 = iconst.i64 1 + v3 = iadd.i64 v1, v2 + v4 = bor.i64 v0, v3 + v5 = iconst.i64 63 + v6 = ushr.i64 v4, v5 + v7 = iconst.i64 1 + v8 = isub.i64 v6, v7 + v9 = bnot.i64 v8 + return v9 +} + +; check: v10 = bmask.i64 v0 +; check: return v10 diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index fe7615afb4a6..9a560f0aa8ea 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -1,4 +1,10 @@ test run +set opt_level=none +target aarch64 +target s390x +target riscv64 +target s390x has_mie2 +set opt_level=speed target aarch64 target s390x target riscv64 @@ -55,3 +61,24 @@ block0(v0: i64, v1: i64, v2: i64): ; run: %bitselect_i64(0, 0, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF ; run: %bitselect_i64(0x5555555555555555, 0, 0xFFFFFFFFFFFFFFFF) == 0xAAAAAAAAAAAAAAAA ; run: %bitselect_i64(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0) == 0xFFFFFFFFFFFFFFFF + + +;; We have a optimization rule in the midend that turns this into a bmask +;; It's easier to have a runtest to ensure that it is correct than to inspect the output. +function %bitops_bmask(i64) -> i64 { +block0(v0: i64): + v1 = bnot v0 + v2 = iconst.i64 1 + v3 = iadd.i64 v1, v2 + v4 = bor.i64 v0, v3 + v5 = iconst.i64 63 + v6 = ushr.i64 v4, v5 + v7 = iconst.i64 1 + v8 = isub.i64 v6, v7 + v9 = bnot.i64 v8 + return v9 +} +; run: %bitops_bmask(0) == 0 +; run: %bitops_bmask(1) == -1 +; run: %bitops_bmask(2) == -1 +; run: %bitops_bmask(3) == -1 From 31fb6edbe245397bc04dcd4feb6df04c8a3f3c54 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 12:14:42 +0100 Subject: [PATCH 2/8] egraphs: Add more `ineg` rules --- cranelift/codegen/src/opts/arithmetic.isle | 10 ++++++ .../filetests/egraph/arithmetic.clif | 34 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index e0e79980e42b..693e81bc7e7e 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -67,6 +67,16 @@ (if-let -1 (i64_sextend_imm64 ty c)) (ineg ty x)) +;; ineg(x) == (!x) + 1 +(rule (simplify (iadd ty (bnot ty x) (iconst ty (u64_from_imm64 1)))) + (ineg ty x)) +(rule (simplify (iadd ty (iconst ty (u64_from_imm64 1)) (bnot ty x))) + (ineg ty x)) + +;; ineg(x) == !(x - 1) +(rule (simplify (bnot ty (isub ty x (iconst ty (u64_from_imm64 1))))) + (ineg ty x)) + ;; x/1 == x. (rule (simplify (sdiv ty x diff --git a/cranelift/filetests/filetests/egraph/arithmetic.clif b/cranelift/filetests/filetests/egraph/arithmetic.clif index 7a014954db32..7d4b1ebb7218 100644 --- a/cranelift/filetests/filetests/egraph/arithmetic.clif +++ b/cranelift/filetests/filetests/egraph/arithmetic.clif @@ -73,6 +73,40 @@ block0(v0: i32): ; check: return v3 } +function %ineg_not_plus_one(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 1 + v3 = iadd v1, v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_plus_one_reverse(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 1 + v3 = iadd v2, v1 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_sub_one(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = isub v0, v1 + v3 = bnot v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + + function %double_fneg(f32) -> f32 { block0(v1: f32): v2 = fneg v1 From e0afbab9e952e0c5c3efac0bc597a05f5c3366ec Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 13:58:59 +0100 Subject: [PATCH 3/8] egraphs: Add sshr rule --- cranelift/codegen/src/opts/shifts.isle | 5 +++++ cranelift/filetests/filetests/egraph/shifts.clif | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 445f5467c801..0637290a86ae 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -78,3 +78,8 @@ (if-let (u64_from_imm64 shift_u64) shift) (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) x) + +;; sshr(x, k) == ineg(ushr(x, k)) when k == ty_bits - 1. +(rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (sshr ty x sconst)) diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index f03d2d41fa42..d9c9da277db5 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -204,3 +204,14 @@ block0(v0: i8): ; check: v5 = sextend.i64 v0 ; check: return v5 } + + +function %ineg_ushr_to_sshr(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 63 + v2 = ushr v0, v1 + v3 = ineg v2 + return v3 + ; check: v4 = sshr v0, v1 + ; check: return v4 +} From 36910b6f319d0242bdb3e65569c046f6ba5c85dc Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 14:06:02 +0100 Subject: [PATCH 4/8] egraphs: Simplify bmask rule --- cranelift/codegen/src/opts/bitops.isle | 18 ++++++------------ .../filetests/filetests/egraph/bitops.clif | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index c75d8225f487..80e83c825a32 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -93,16 +93,10 @@ (if-let -1 (i64_sextend_imm64 ty k)) (bnot ty x)) -;; bmask(input) = !(((input | ((!input) + 1)) >> 63) - 1) -(rule (simplify (bnot ty - (isub ty - (ushr ty - (bor ty - input - (iadd ty - (bnot ty input) - (iconst ty (u64_from_imm64 1)))) - (iconst ty (u64_from_imm64 shift_amt))) - (iconst ty (u64_from_imm64 1))))) +;; bmask(x) = sshr((x | -x), N) where N = ty_bits(ty) - 1. +;; +;; (x | -x) sets the sign bit to 1 if x is nonzero, and 0 if x is zero. sshr propagates +;; the sign bit to the rest of the value. +(rule (simplify (sshr ty (bor ty x (ineg ty x)) (iconst ty (u64_from_imm64 shift_amt)))) (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) - (bmask ty input)) + (bmask ty x)) diff --git a/cranelift/filetests/filetests/egraph/bitops.clif b/cranelift/filetests/filetests/egraph/bitops.clif index dbda7520ea34..d3e9fb381de8 100644 --- a/cranelift/filetests/filetests/egraph/bitops.clif +++ b/cranelift/filetests/filetests/egraph/bitops.clif @@ -138,7 +138,6 @@ block0(v1: i64): ; check: v5 = bnot v1 ; check: return v5 - function %bitops_bmask(i64) -> i64 { block0(v0: i64): v1 = bnot v0 @@ -153,5 +152,17 @@ block0(v0: i64): return v9 } -; check: v10 = bmask.i64 v0 -; check: return v10 +; check: v14 = bmask.i64 v0 +; check: return v14 + +function %bmask_sshr(i64) -> i64 { +block0(v0: i64): + v1 = ineg v0 + v2 = bor v0, v1 + v3 = iconst.i64 63 + v4 = sshr v2, v3 + return v4 +} + +; check: v5 = bmask.i64 v0 +; check: return v5 From 6190b97f31d839381dab99bdada597dc83790c72 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 20:15:45 +0100 Subject: [PATCH 5/8] egraphs: Add comutative version of bmask rule --- cranelift/codegen/src/opts/bitops.isle | 6 +++++- cranelift/filetests/filetests/egraph/bitops.clif | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index 80e83c825a32..7bff8e1994c4 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -94,9 +94,13 @@ (bnot ty x)) ;; bmask(x) = sshr((x | -x), N) where N = ty_bits(ty) - 1. -;; +;; ;; (x | -x) sets the sign bit to 1 if x is nonzero, and 0 if x is zero. sshr propagates ;; the sign bit to the rest of the value. (rule (simplify (sshr ty (bor ty x (ineg ty x)) (iconst ty (u64_from_imm64 shift_amt)))) (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) (bmask ty x)) + +(rule (simplify (sshr ty (bor ty (ineg ty x) x) (iconst ty (u64_from_imm64 shift_amt)))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (bmask ty x)) diff --git a/cranelift/filetests/filetests/egraph/bitops.clif b/cranelift/filetests/filetests/egraph/bitops.clif index d3e9fb381de8..9f02eed10c3f 100644 --- a/cranelift/filetests/filetests/egraph/bitops.clif +++ b/cranelift/filetests/filetests/egraph/bitops.clif @@ -166,3 +166,15 @@ block0(v0: i64): ; check: v5 = bmask.i64 v0 ; check: return v5 + +function %bmask_reverse_sshr(i64) -> i64 { +block0(v0: i64): + v1 = ineg v0 + v2 = bor v1, v0 + v3 = iconst.i64 63 + v4 = sshr v2, v3 + return v4 +} + +; check: v5 = bmask.i64 v0 +; check: return v5 From 498abbb565015c8c2014fe406ce942ec1bdd1ccd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 20:19:52 +0100 Subject: [PATCH 6/8] egraphs: Add more testcases --- .../filetests/filetests/runtests/bitops.clif | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index 9a560f0aa8ea..3fd366aa575e 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -65,20 +65,20 @@ block0(v0: i64, v1: i64, v2: i64): ;; We have a optimization rule in the midend that turns this into a bmask ;; It's easier to have a runtest to ensure that it is correct than to inspect the output. -function %bitops_bmask(i64) -> i64 { -block0(v0: i64): +function %bitops_bmask(i16) -> i16 { +block0(v0: i16): v1 = bnot v0 - v2 = iconst.i64 1 - v3 = iadd.i64 v1, v2 - v4 = bor.i64 v0, v3 - v5 = iconst.i64 63 - v6 = ushr.i64 v4, v5 - v7 = iconst.i64 1 - v8 = isub.i64 v6, v7 - v9 = bnot.i64 v8 + v2 = iconst.i16 1 + v3 = iadd.i16 v1, v2 + v4 = bor.i16 v0, v3 + v5 = iconst.i16 15 + v6 = ushr.i16 v4, v5 + v7 = iconst.i16 1 + v8 = isub.i16 v6, v7 + v9 = bnot.i16 v8 return v9 } ; run: %bitops_bmask(0) == 0 ; run: %bitops_bmask(1) == -1 -; run: %bitops_bmask(2) == -1 -; run: %bitops_bmask(3) == -1 +; run: %bitops_bmask(0xFFFF) == -1 +; run: %bitops_bmask(0x8000) == -1 From 7da307bdbc5a050e4f83725081f073a4b3dcc0be Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 20:22:39 +0100 Subject: [PATCH 7/8] egraphs: Cleanup rule comments --- cranelift/codegen/src/opts/arithmetic.isle | 4 ++-- cranelift/codegen/src/opts/bitops.isle | 2 +- cranelift/codegen/src/opts/shifts.isle | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index 693e81bc7e7e..ce4b1fac53f5 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -67,13 +67,13 @@ (if-let -1 (i64_sextend_imm64 ty c)) (ineg ty x)) -;; ineg(x) == (!x) + 1 +;; (!x) + 1 == ineg(x) (rule (simplify (iadd ty (bnot ty x) (iconst ty (u64_from_imm64 1)))) (ineg ty x)) (rule (simplify (iadd ty (iconst ty (u64_from_imm64 1)) (bnot ty x))) (ineg ty x)) -;; ineg(x) == !(x - 1) +;; !(x - 1) == ineg(x) (rule (simplify (bnot ty (isub ty x (iconst ty (u64_from_imm64 1))))) (ineg ty x)) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index 7bff8e1994c4..09836d3a63c7 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -93,7 +93,7 @@ (if-let -1 (i64_sextend_imm64 ty k)) (bnot ty x)) -;; bmask(x) = sshr((x | -x), N) where N = ty_bits(ty) - 1. +;; sshr((x | -x), N) == bmask(x) where N = ty_bits(ty) - 1. ;; ;; (x | -x) sets the sign bit to 1 if x is nonzero, and 0 if x is zero. sshr propagates ;; the sign bit to the rest of the value. diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 0637290a86ae..44c79a6f8dc2 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -79,7 +79,7 @@ (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) x) -;; sshr(x, k) == ineg(ushr(x, k)) when k == ty_bits - 1. +;; ineg(ushr(x, k)) == sshr(x, k) when k == ty_bits - 1. (rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) (sshr ty x sconst)) From e1d2f515b01fb0190f9f048cacc5255b15b027b4 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 13 Apr 2023 20:49:39 +0100 Subject: [PATCH 8/8] egraphs: Add more `ineg` optimizations --- cranelift/codegen/src/opts/arithmetic.isle | 13 +++++-- .../filetests/egraph/arithmetic.clif | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index ce4b1fac53f5..00c56c2409c5 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -67,15 +67,24 @@ (if-let -1 (i64_sextend_imm64 ty c)) (ineg ty x)) -;; (!x) + 1 == ineg(x) +;; (!x) + 1 == 1 + (!x) == !(x) - (-1) == ineg(x) (rule (simplify (iadd ty (bnot ty x) (iconst ty (u64_from_imm64 1)))) (ineg ty x)) (rule (simplify (iadd ty (iconst ty (u64_from_imm64 1)) (bnot ty x))) (ineg ty x)) +(rule (simplify (isub ty (bnot ty x) (iconst ty c))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) -;; !(x - 1) == ineg(x) +;; !(x - 1) == !(x + (-1)) == !((-1) + x) == ineg(x) (rule (simplify (bnot ty (isub ty x (iconst ty (u64_from_imm64 1))))) (ineg ty x)) +(rule (simplify (bnot ty (iadd ty x (iconst ty c)))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) +(rule (simplify (bnot ty (iadd ty (iconst ty c) x))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) ;; x/1 == x. (rule (simplify (sdiv ty diff --git a/cranelift/filetests/filetests/egraph/arithmetic.clif b/cranelift/filetests/filetests/egraph/arithmetic.clif index 7d4b1ebb7218..630aebeca450 100644 --- a/cranelift/filetests/filetests/egraph/arithmetic.clif +++ b/cranelift/filetests/filetests/egraph/arithmetic.clif @@ -95,6 +95,17 @@ block0(v0: i32): ; check: v4 = ineg v0 ; check: return v4 +function %ineg_not_minus_neg_1(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 -1 + v3 = isub v1, v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + function %ineg_not_sub_one(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 1 @@ -106,6 +117,29 @@ block0(v0: i32): ; check: v4 = ineg v0 ; check: return v4 +function %ineg_not_add_neg_one(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd v0, v1 + v3 = bnot v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_add_neg_one_reverse(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd v1, v0 + v3 = bnot v2 + return v3 +} + +; check: v6 = ineg v0 +; check: return v6 + + function %double_fneg(f32) -> f32 { block0(v1: f32):