From 3c3e5b5522569f6e93b50dbd206c044e3016177f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 12 Oct 2022 11:11:14 -0400 Subject: [PATCH] Improve simplification by running it twice --- datafusion-examples/examples/expr_api.rs | 9 ++--- datafusion/optimizer/src/expr_simplifier.rs | 44 +++++++++++++++++++-- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs index 35a508b0fbc52..fe7a7936d5de7 100644 --- a/datafusion-examples/examples/expr_api.rs +++ b/datafusion-examples/examples/expr_api.rs @@ -106,12 +106,11 @@ fn simplify_demo() -> Result<()> { col("i") + lit(3) ); - // TODO uncomment when https://github.com/apache/arrow-datafusion/issues/1160 is done // (i * 0) > 5 --> false (only if null) - // assert_eq!( - // simplifier.simplify((col("i") * lit(0)).gt(lit(5)))?, - // lit(false) - // ); + assert_eq!( + simplifier.simplify((col("i") * lit(0)).gt(lit(5)))?, + lit(false) + ); // Logical simplification diff --git a/datafusion/optimizer/src/expr_simplifier.rs b/datafusion/optimizer/src/expr_simplifier.rs index 6e56ff715b5d3..a30fa9c3b5eee 100644 --- a/datafusion/optimizer/src/expr_simplifier.rs +++ b/datafusion/optimizer/src/expr_simplifier.rs @@ -111,14 +111,18 @@ impl ExprSimplifier { /// assert_eq!(expr, b_lt_2); /// ``` pub fn simplify(&self, expr: Expr) -> Result { - let mut rewriter = Simplifier::new(&self.info); + let mut simplifier = Simplifier::new(&self.info); let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?; // TODO iterate until no changes are made during rewrite // (evaluating constants can enable new simplifications and // simplifications can enable new constant evaluation) // https://github.com/apache/arrow-datafusion/issues/1160 - expr.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter) + expr.rewrite(&mut const_evaluator)? + .rewrite(&mut simplifier)? + // run both passes twice to try an minimize simplifications that we missed + .rewrite(&mut const_evaluator)? + .rewrite(&mut simplifier) } /// Apply type coercion to an [`Expr`] so that it can be @@ -231,7 +235,7 @@ mod tests { use super::*; use arrow::datatypes::{Field, Schema}; use datafusion_common::ToDFSchema; - use datafusion_expr::{col, lit}; + use datafusion_expr::{col, lit, when}; #[test] fn api_basic() { @@ -274,4 +278,38 @@ mod tests { .to_dfschema_ref() .unwrap() } + + #[test] + fn simplify_and_constant_prop() { + let props = ExecutionProps::new(); + let simplifier = + ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + + // should be able to simplify to false + // (i * (1 - 2)) > 0 + let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0)); + let expected = lit(false); + assert_eq!(expected, simplifier.simplify(expr).unwrap()); + } + + #[test] + fn simplify_and_constant_prop_with_case() { + let props = ExecutionProps::new(); + let simplifier = + ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + + // CASE + // WHEN i>5 AND false THEN i > 5 + // WHEN i<5 AND true THEN i < 5 + // ELSE false + // END + // + // Can be simplified to `i < 5` + let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5))) + .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5))) + .otherwise(lit(false)) + .unwrap(); + let expected = col("i").lt(lit(5)); + assert_eq!(expected, simplifier.simplify(expr).unwrap()); + } }