diff --git a/RELEASES.md b/RELEASES.md index b3bfea5f7fcc..fdc3cff96bac 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,5 +1,20 @@ -------------------------------------------------------------------------------- +## 17.0.1 + +### Fixed + +* Fix an egraph elaboration fuzzbug that was allowing values with dependencies + that shouldn't be duplicated to be chosen in a context that would make them + invalid. + [#7859](https://github.com/bytecodealliance/wasmtime/pull/7859) + [#7879](https://github.com/bytecodealliance/wasmtime/pull/7879) +* Fix an egraph rule bug that was allowing unconstrained recursion through the + DFG to run away on large functions. + [#7882](https://github.com/bytecodealliance/wasmtime/pull/7882) + +-------------------------------------------------------------------------------- + ## 17.0.0 Released 2024-01-25 diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index 0f699b68d854..fc583443ea1a 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -700,4 +700,5 @@ pub(crate) struct Stats { pub(crate) elaborate_func: u64, pub(crate) elaborate_func_pre_insts: u64, pub(crate) elaborate_func_post_insts: u64, + pub(crate) elaborate_best_cost_fixpoint_iters: u64, } diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs index 2870b61f515f..28aab40e9740 100644 --- a/cranelift/codegen/src/egraph/cost.rs +++ b/cranelift/codegen/src/egraph/cost.rs @@ -74,7 +74,7 @@ impl Cost { const DEPTH_BITS: u8 = 8; const DEPTH_MASK: u32 = (1 << Self::DEPTH_BITS) - 1; const OP_COST_MASK: u32 = !Self::DEPTH_MASK; - const MAX_OP_COST: u32 = (Self::OP_COST_MASK >> Self::DEPTH_BITS) - 1; + const MAX_OP_COST: u32 = Self::OP_COST_MASK >> Self::DEPTH_BITS; pub(crate) fn infinity() -> Cost { // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` @@ -86,14 +86,16 @@ impl Cost { Cost(0) } - /// Construct a new finite cost from the given parts. + /// Construct a new `Cost` from the given parts. /// - /// The opcode cost is clamped to the maximum value representable. - fn new_finite(opcode_cost: u32, depth: u8) -> Cost { - let opcode_cost = std::cmp::min(opcode_cost, Self::MAX_OP_COST); - let cost = Cost((opcode_cost << Self::DEPTH_BITS) | u32::from(depth)); - debug_assert_ne!(cost, Cost::infinity()); - cost + /// If the opcode cost is greater than or equal to the maximum representable + /// opcode cost, then the resulting `Cost` saturates to infinity. + fn new(opcode_cost: u32, depth: u8) -> Cost { + if opcode_cost >= Self::MAX_OP_COST { + Self::infinity() + } else { + Cost(opcode_cost << Self::DEPTH_BITS | u32::from(depth)) + } } fn depth(&self) -> u8 { @@ -111,7 +113,7 @@ impl Cost { /// that satisfies `inst_predicates::is_pure_for_egraph()`. pub(crate) fn of_pure_op(op: Opcode, operand_costs: impl IntoIterator) -> Self { let c = pure_op_cost(op) + operand_costs.into_iter().sum(); - Cost::new_finite(c.op_cost(), c.depth().saturating_add(1)) + Cost::new(c.op_cost(), c.depth().saturating_add(1)) } } @@ -131,12 +133,9 @@ impl std::ops::Add for Cost { type Output = Cost; fn add(self, other: Cost) -> Cost { - let op_cost = std::cmp::min( - self.op_cost().saturating_add(other.op_cost()), - Self::MAX_OP_COST, - ); + let op_cost = self.op_cost().saturating_add(other.op_cost()); let depth = std::cmp::max(self.depth(), other.depth()); - Cost::new_finite(op_cost, depth) + Cost::new(op_cost, depth) } } @@ -147,11 +146,11 @@ impl std::ops::Add for Cost { fn pure_op_cost(op: Opcode) -> Cost { match op { // Constants. - Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new_finite(1, 0), + Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1, 0), // Extends/reduces. Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => { - Cost::new_finite(2, 0) + Cost::new(2, 0) } // "Simple" arithmetic. @@ -163,9 +162,52 @@ fn pure_op_cost(op: Opcode) -> Cost { | Opcode::Bnot | Opcode::Ishl | Opcode::Ushr - | Opcode::Sshr => Cost::new_finite(3, 0), + | Opcode::Sshr => Cost::new(3, 0), // Everything else (pure.) - _ => Cost::new_finite(4, 0), + _ => Cost::new(4, 0), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_cost() { + let a = Cost::new(5, 2); + let b = Cost::new(37, 3); + assert_eq!(a + b, Cost::new(42, 3)); + assert_eq!(b + a, Cost::new(42, 3)); + } + + #[test] + fn add_infinity() { + let a = Cost::new(5, 2); + let b = Cost::infinity(); + assert_eq!(a + b, Cost::infinity()); + assert_eq!(b + a, Cost::infinity()); + } + + #[test] + fn op_cost_saturates_to_infinity() { + let a = Cost::new(Cost::MAX_OP_COST - 10, 2); + let b = Cost::new(11, 2); + assert_eq!(a + b, Cost::infinity()); + assert_eq!(b + a, Cost::infinity()); + } + + #[test] + fn depth_saturates_to_max_depth() { + let a = Cost::new(10, u8::MAX); + let b = Cost::new(10, 1); + assert_eq!( + Cost::of_pure_op(Opcode::Iconst, [a, b]), + Cost::new(21, u8::MAX) + ); + assert_eq!( + Cost::of_pure_op(Opcode::Iconst, [b, a]), + Cost::new(21, u8::MAX) + ); } } diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 5637215ee66d..9b9d1ea05641 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -7,6 +7,7 @@ use super::Stats; use crate::dominator_tree::DominatorTree; use crate::fx::{FxHashMap, FxHashSet}; use crate::hash_map::Entry as HashEntry; +use crate::inst_predicates::is_pure_for_egraph; use crate::ir::{Block, Function, Inst, Value, ValueDef}; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::scoped_hash_map::ScopedHashMap; @@ -216,46 +217,112 @@ impl<'a> Elaborator<'a> { fn compute_best_values(&mut self) { let best = &mut self.value_to_best_value; - for (value, def) in self.func.dfg.values_and_defs() { - trace!("computing best for value {:?} def {:?}", value, def); - match def { - ValueDef::Union(x, y) => { - // Pick the best of the two options based on - // min-cost. This works because each element of `best` - // is a `(cost, value)` tuple; `cost` comes first so - // the natural comparison works based on cost, and - // breaks ties based on value number. - trace!(" -> best of {:?} and {:?}", best[x], best[y]); - best[value] = std::cmp::min(best[x], best[y]); - trace!(" -> {:?}", best[value]); - } - ValueDef::Param(_, _) => { - best[value] = BestEntry(Cost::zero(), value); - } - // If the Inst is inserted into the layout (which is, - // at this point, only the side-effecting skeleton), - // then it must be computed and thus we give it zero - // cost. - ValueDef::Result(inst, _) => { - if let Some(_) = self.func.layout.inst_block(inst) { - best[value] = BestEntry(Cost::zero(), value); - } else { - trace!(" -> value {}: result, computing cost", value); - let inst_data = &self.func.dfg.insts[inst]; - // N.B.: at this point we know that the opcode is - // pure, so `pure_op_cost`'s precondition is - // satisfied. - let cost = Cost::of_pure_op( - inst_data.opcode(), - self.func.dfg.inst_values(inst).map(|value| best[value].0), + + // Do a fixpoint loop to compute the best value for each eclass. + // + // The maximum number of iterations is the length of the longest chain + // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this + // is *technically* quadratic, but `cranelift-frontend` won't construct + // any such edges. NaN canonicalization will introduce some of these + // edges, but they are chains of only two or three edges. So in + // practice, we *never* do more than a handful of iterations here unless + // (a) we parsed the CLIF from text and the text was funkily numbered, + // which we don't really care about, or (b) the CLIF producer did + // something weird, in which case it is their responsibility to stop + // doing that. + trace!("Entering fixpoint loop to compute the best values for each eclass"); + let mut keep_going = true; + while keep_going { + keep_going = false; + trace!( + "fixpoint iteration {}", + self.stats.elaborate_best_cost_fixpoint_iters + ); + self.stats.elaborate_best_cost_fixpoint_iters += 1; + + for (value, def) in self.func.dfg.values_and_defs() { + trace!("computing best for value {:?} def {:?}", value, def); + let orig_best_value = best[value]; + + match def { + ValueDef::Union(x, y) => { + // Pick the best of the two options based on + // min-cost. This works because each element of `best` + // is a `(cost, value)` tuple; `cost` comes first so + // the natural comparison works based on cost, and + // breaks ties based on value number. + best[value] = std::cmp::min(best[x], best[y]); + trace!( + " -> best of union({:?}, {:?}) = {:?}", + best[x], + best[y], + best[value] ); - best[value] = BestEntry(cost, value); } - } - }; - debug_assert_ne!(best[value].0, Cost::infinity()); - debug_assert_ne!(best[value].1, Value::reserved_value()); - trace!("best for eclass {:?}: {:?}", value, best[value]); + ValueDef::Param(_, _) => { + best[value] = BestEntry(Cost::zero(), value); + } + // If the Inst is inserted into the layout (which is, + // at this point, only the side-effecting skeleton), + // then it must be computed and thus we give it zero + // cost. + ValueDef::Result(inst, _) => { + if let Some(_) = self.func.layout.inst_block(inst) { + best[value] = BestEntry(Cost::zero(), value); + } else { + let inst_data = &self.func.dfg.insts[inst]; + // N.B.: at this point we know that the opcode is + // pure, so `pure_op_cost`'s precondition is + // satisfied. + let cost = Cost::of_pure_op( + inst_data.opcode(), + self.func.dfg.inst_values(inst).map(|value| best[value].0), + ); + best[value] = BestEntry(cost, value); + trace!(" -> cost of value {} = {:?}", value, cost); + } + } + }; + + // Keep on iterating the fixpoint loop while we are finding new + // best values. + keep_going |= orig_best_value != best[value]; + } + } + + if cfg!(any(feature = "trace-log", debug_assertions)) { + trace!("finished fixpoint loop to compute best value for each eclass"); + for value in self.func.dfg.values() { + trace!("-> best for eclass {:?}: {:?}", value, best[value]); + debug_assert_ne!(best[value].1, Value::reserved_value()); + // You might additionally be expecting an assert that the best + // cost is not infinity, however infinite cost *can* happen in + // practice. First, note that our cost function doesn't know + // about any shared structure in the dataflow graph, it only + // sums operand costs. (And trying to avoid that by deduping a + // single operation's operands is a losing game because you can + // always just add one indirection and go from `add(x, x)` to + // `add(foo(x), bar(x))` to hide the shared structure.) Given + // that blindness to sharing, we can make cost grow + // exponentially with a linear sequence of operations: + // + // v0 = iconst.i32 1 ;; cost = 1 + // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1 + // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5 + // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13 + // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29 + // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61 + // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125 + // ;; etc... + // + // Such a chain can cause cost to saturate to infinity. How do + // we choose which e-node is best when there are multiple that + // have saturated to infinity? It doesn't matter. As long as + // invariant (2) for optimization rules is upheld by our rule + // set (see `cranelift/codegen/src/opts/README.md`) it is safe + // to choose *any* e-node in the e-class. At worst we will + // produce suboptimal code, but never an incorrectness. + } } } @@ -606,7 +673,13 @@ impl<'a> Elaborator<'a> { } inst }; + // Place the inst just before `before`. + debug_assert!( + is_pure_for_egraph(self.func, inst), + "something has gone very wrong if we are elaborating effectful \ + instructions, they should have remained in the skeleton" + ); self.func.layout.insert_inst(inst, before); // Update the inst's arguments. diff --git a/cranelift/codegen/src/opts/README.md b/cranelift/codegen/src/opts/README.md index b4f46420abe0..d7b09e4add71 100644 --- a/cranelift/codegen/src/opts/README.md +++ b/cranelift/codegen/src/opts/README.md @@ -1,5 +1,87 @@ -Rules here are allowed to rewrite pure expressions arbitrarily, -using the same inputs as the original, or fewer. In other words, we -cannot pull a new eclass id out of thin air and refer to it, other -than a piece of the input or a new node that we construct; but we -can freely rewrite e.g. `x+y-y` to `x`. +# Rules for Writing Optimization Rules + +For both correctness and compile speed, we must be careful with our rules. A lot +of it boils down to the fact that, unlike traditional e-graphs, our rules are +*directional*. + +1. Rules should not rewrite to worse code: the right-hand side should be at + least as good as the left-hand side or better. + + For example, the rule + + x => (add x 0) + + is disallowed, but swapping its left- and right-hand sides produces a rule + that is allowed. + + Any kind of canonicalizing rule that intends to help subsequent rules match + and unlock further optimizations (e.g. floating constants to the right side + for our constant-propagation rules to match) must produce canonicalized + output that is no worse than its noncanonical input. + + We assume this invariant as a heuristic to break ties between two + otherwise-equal-cost expressions in various places, making up for some + limitations of our explicit cost function. + +2. Any rule that removes value-uses in its right-hand side that previously + existed in its left-hand side MUST use `subsume`. + + For example, the rule + + (select 1 x y) => x + + MUST use `subsume`. + + This is required for correctness because, once a value-use is removed, some + e-nodes in the e-class are more equal than others. There might be uses of `x` + in a scope where `y` is not available, and so emitting `(select 1 x y)` in + place of `x` in such cases would introduce uses of `y` where it is not + defined. + + An exception to this rule is discarding constants, as they can be + rematerialized anywhere without introducing correctness issues. For example, + the (admittedly silly) rule `(select 1 x (iconst_u _)) => x` would be a good + candidate for not using `subsume`, as it does not discard any non-constant + values introduced in its LHS. + +3. Avoid overly general rewrites like commutativity and associativity. Instead, + prefer targeted instances of the rewrite (for example, canonicalizing adds + where one operand is a constant such that the constant is always the add's + second operand, rather than general commutativity for adds) or even writing + the "same" optimization rule multiple times. + + For example, the commutativity in the first rule in the following snippet is + bad because it will match even when the first operand is not an add: + + ;; Commute to allow `(foo (add ...) x)`, when we see it, to match. + (foo x y) => (foo y x) + + ;; Optimize. + (foo x (add ...)) => (bar x) + + Better is to commute only when we know that canonicalizing in this way will + all definitely allow the subsequent optimization rule to match: + + ;; Canonicalize all adds to `foo`'s second operand. + (foo (add ...) x) => (foo x (add ...)) + + ;; Optimize. + (foo x (add ...)) => (bar x) + + But even better in this case is to write the "same" optimization multiple + times: + + (foo (add ...) x) => (bar x) + (foo x (add ...)) => (bar x) + + The cost of rule-matching is amortized by the ISLE compiler, where as the + intermediate result of each rewrite allocates new e-nodes and requires + storage in the dataflow graph. Therefore, additional rules are cheaper than + additional e-nodes. + + Commutativity and associativity in particular can cause huge amounts of + e-graph bloat. + + One day we intend to extend ISLE with built-in support for commutativity, so + we don't need to author the redundant commutations ourselves: + https://github.com/bytecodealliance/wasmtime/issues/6128 diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index 94574592d2e2..f84b300f62bd 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -167,12 +167,10 @@ (bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) (bxor ty x (bxor ty k1 k2))) -(rule (simplify - (select ty (iconst_u _ (u64_nonzero _)) x y)) - x) -(rule (simplify - (select ty (iconst_u _ 0) x y)) - y) +(rule (simplify (select ty (iconst_u _ (u64_nonzero _)) x _)) + (subsume x)) +(rule (simplify (select ty (iconst_u _ 0) _ y)) + (subsume y)) ;; Replace subtraction by a "negative" constant with addition. ;; Notably, this gives `x - (-1) == x + 1`, so other patterns don't have to diff --git a/cranelift/codegen/src/opts/extends.isle b/cranelift/codegen/src/opts/extends.isle index a6bb2c5623f7..268407b0748a 100644 --- a/cranelift/codegen/src/opts/extends.isle +++ b/cranelift/codegen/src/opts/extends.isle @@ -29,12 +29,12 @@ (slt ty (uextend $I64 x @ (value_type $I32)) (iconst_u _ 0))) - (iconst_u ty 0)) + (subsume (iconst_u ty 0))) (rule (simplify (sge ty (uextend $I64 x @ (value_type $I32)) (iconst_u _ 0))) - (iconst_u ty 1)) + (subsume (iconst_u ty 1))) ;; Sign-extending can't change whether a number is zero nor how it signed-compares to zero (rule (simplify (eq _ (sextend _ x@(value_type ty)) (iconst_s _ 0))) @@ -77,18 +77,26 @@ ;; Matches values where `ireducing` them will not actually introduce another ;; instruction, since other rules will collapse them with the reduction. -(decl pure multi will_simplify_with_ireduce (Value) Value) -(rule (will_simplify_with_ireduce x@(uextend _ _)) x) -(rule (will_simplify_with_ireduce x@(sextend _ _)) x) -(rule (will_simplify_with_ireduce x@(iconst _ _)) x) -(rule (will_simplify_with_ireduce x@(unary_op _ _ a)) - (if-let _ (will_simplify_with_ireduce a)) +(decl pure multi will_simplify_with_ireduce_rec (u8 Value) Value) +(rule (will_simplify_with_ireduce_rec _ x@(uextend _ _)) x) +(rule (will_simplify_with_ireduce_rec _ x@(sextend _ _)) x) +(rule (will_simplify_with_ireduce_rec _ x@(iconst _ _)) x) +(rule (will_simplify_with_ireduce_rec depth x@(unary_op _ _ a)) + (if-let _ (u8_lt 0 depth)) + (if-let _ (reducible_modular_op x)) + (if-let _ (will_simplify_with_ireduce_rec (u8_sub depth 1) a)) x) -(rule (will_simplify_with_ireduce x@(binary_op _ _ a b)) - (if-let _ (will_simplify_with_ireduce a)) - (if-let _ (will_simplify_with_ireduce b)) +(rule (will_simplify_with_ireduce_rec depth x@(binary_op _ _ a b)) + (if-let _ (u8_lt 0 depth)) + (if-let _ (reducible_modular_op x)) + (if-let _ (will_simplify_with_ireduce_rec (u8_sub depth 1) a)) + (if-let _ (will_simplify_with_ireduce_rec (u8_sub depth 1) b)) x) +(decl pure multi will_simplify_with_ireduce (Value) Value) +(rule (will_simplify_with_ireduce x) + (will_simplify_with_ireduce_rec 2 x)) + ;; Matches values where the high bits of the input don't affect lower bits of ;; the output, and thus the inputs can be reduced before the operation rather ;; than doing the wide operation then reducing afterwards. diff --git a/cranelift/codegen/src/opts/icmp.isle b/cranelift/codegen/src/opts/icmp.isle index fc1d0a157b0b..f7d20fa3d322 100644 --- a/cranelift/codegen/src/opts/icmp.isle +++ b/cranelift/codegen/src/opts/icmp.isle @@ -2,16 +2,16 @@ ;; `x == x` is always true for integers; `x != x` is false. Strict ;; inequalities are false, and loose inequalities are true. -(rule (simplify (eq (ty_int ty) x x)) (iconst_u ty 1)) -(rule (simplify (ne (ty_int ty) x x)) (iconst_u ty 0)) -(rule (simplify (ugt (ty_int ty) x x)) (iconst_u ty 0)) -(rule (simplify (uge (ty_int ty) x x)) (iconst_u ty 1)) -(rule (simplify (sgt (ty_int ty) x x)) (iconst_u ty 0)) -(rule (simplify (sge (ty_int ty) x x)) (iconst_u ty 1)) -(rule (simplify (ult (ty_int ty) x x)) (iconst_u ty 0)) -(rule (simplify (ule (ty_int ty) x x)) (iconst_u ty 1)) -(rule (simplify (slt (ty_int ty) x x)) (iconst_u ty 0)) -(rule (simplify (sle (ty_int ty) x x)) (iconst_u ty 1)) +(rule (simplify (eq (ty_int ty) x x)) (subsume (iconst_u ty 1))) +(rule (simplify (ne (ty_int ty) x x)) (subsume (iconst_u ty 0))) +(rule (simplify (ugt (ty_int ty) x x)) (subsume (iconst_u ty 0))) +(rule (simplify (uge (ty_int ty) x x)) (subsume (iconst_u ty 1))) +(rule (simplify (sgt (ty_int ty) x x)) (subsume (iconst_u ty 0))) +(rule (simplify (sge (ty_int ty) x x)) (subsume (iconst_u ty 1))) +(rule (simplify (ult (ty_int ty) x x)) (subsume (iconst_u ty 0))) +(rule (simplify (ule (ty_int ty) x x)) (subsume (iconst_u ty 1))) +(rule (simplify (slt (ty_int ty) x x)) (subsume (iconst_u ty 0))) +(rule (simplify (sle (ty_int ty) x x)) (subsume (iconst_u ty 1))) ;; Optimize icmp-of-icmp. (rule (simplify (ne ty diff --git a/cranelift/filetests/filetests/egraph/associative-and-commutative.clif b/cranelift/filetests/filetests/egraph/associative-and-commutative.clif index 005a87d5f75d..ab4b04914ba3 100644 --- a/cranelift/filetests/filetests/egraph/associative-and-commutative.clif +++ b/cranelift/filetests/filetests/egraph/associative-and-commutative.clif @@ -79,3 +79,41 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32): ; nextln: v8 = bxor v4, v7 ; check: return v8 } + +;; We don't have any assertions about the result of optimizing this function, +;; but it's a good canary for unbounded recursion in optimization rulesets. In +;; particular, because of the shared structure in the dag, it won't be obvious +;; to rules that are pattern matching on trees that this is actually a chain, +;; and they will exhibit exponential behavior as a result. +function %iadd_big_chain(i8) -> i16 { +block0(v0: i8): + v1 = uextend.i32 v0 + v2 = iconst.i32 42 + v3 = iadd v1, v2 + v4 = iadd v3, v3 + v5 = iadd v4, v4 + v6 = iadd v5, v5 + v7 = iadd v6, v6 + v8 = iadd v7, v7 + v9 = iadd v8, v8 + v10 = iadd v9, v9 + v11 = iadd v10, v10 + v12 = iadd v11, v11 + v13 = iadd v12, v12 + v14 = iadd v13, v13 + v15 = iadd v14, v14 + v16 = iadd v15, v15 + v17 = iadd v16, v16 + v18 = iadd v17, v17 + v19 = iadd v18, v18 + v20 = iadd v19, v19 + v21 = iadd v20, v20 + v22 = iadd v21, v21 + v23 = iadd v22, v22 + v24 = iadd v23, v23 + v25 = iadd v24, v24 + v26 = iadd v25, v25 + v27 = iadd v26, v26 + v28 = ireduce.i16 v27 + return v28 +} diff --git a/cranelift/filetests/filetests/egraph/issue-7875.clif b/cranelift/filetests/filetests/egraph/issue-7875.clif new file mode 100644 index 000000000000..abdcb32288a5 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/issue-7875.clif @@ -0,0 +1,37 @@ +test optimize +set enable_verifier=true +set opt_level=speed +target x86_64 + +;; This test case should optimize just fine, and should definitely not produce +;; CLIF that has verifier errors like +;; +;; error: inst10 (v12 = select.f32 v11, v4, v10 ; v11 = 1): uses value arg +;; from non-dominating block4 + +function %foo() { +block0: + v0 = iconst.i64 0 + v2 = f32const 0.0 + v9 = f32const 0.0 + v20 = fneg v2 + v18 = fcmp eq v20, v20 + v4 = select v18, v2, v20 + v8 = iconst.i32 0 + v11 = iconst.i32 1 + brif v0, block2, block3 + +block2: + brif.i32 v8, block4(v2), block4(v9) + +block4(v10: f32): + v12 = select.f32 v11, v4, v10 + v13 = bitcast.i32 v12 + store v13, v0 + trap user0 + +block3: + v15 = bitcast.i32 v4 + store v15, v0 + return +}