From 2ed544e71d2661b1b287df6d0120ca5a2bb378f6 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Mon, 27 Feb 2023 21:06:40 +0300 Subject: [PATCH 01/13] feat: add optimization rules for bitwise operations --- datafusion/expr/src/expr.rs | 25 + datafusion/expr/src/expr_fn.rs | 45 ++ .../simplify_expressions/expr_simplifier.rs | 640 +++++++++++++++++- .../src/simplify_expressions/utils.rs | 75 +- 4 files changed, 780 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 8b6c39043b6c9..1530513e96371 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -639,6 +639,31 @@ impl Expr { binary_expr(self, Operator::Or, other) } + /// Return `self & other` + pub fn bitwise_and(self, other: Expr) -> Expr { + binary_expr(self, Operator::BitwiseAnd, other) + } + + /// Return `self | other` + pub fn bitwise_or(self, other: Expr) -> Expr { + binary_expr(self, Operator::BitwiseOr, other) + } + + /// Return `self ^ other` + pub fn bitwise_xor(self, other: Expr) -> Expr { + binary_expr(self, Operator::BitwiseXor, other) + } + + /// Return `self >> other` + pub fn bitwise_shift_right(self, other: Expr) -> Expr { + binary_expr(self, Operator::BitwiseShiftRight, other) + } + + /// Return `self << other` + pub fn bitwise_shift_left(self, other: Expr) -> Expr { + binary_expr(self, Operator::BitwiseShiftLeft, other) + } + /// Return `!self` #[allow(clippy::should_implement_trait)] pub fn not(self) -> Expr { diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 9ec35e2387769..4eb6ba520fbc3 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -112,6 +112,51 @@ pub fn count(expr: Expr) -> Expr { )) } +/// Return a new expression with bitwise AND +pub fn bitwise_and(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + Operator::BitwiseAnd, + Box::new(right), + )) +} + +/// Return a new expression with bitwise OR +pub fn bitwise_or(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + Operator::BitwiseOr, + Box::new(right), + )) +} + +/// Return a new expression with bitwise XOR +pub fn bitwise_xor(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + Operator::BitwiseXor, + Box::new(right), + )) +} + +/// Return a new expression with bitwise SHIFT RIGHT +pub fn bitwise_shift_right(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + Operator::BitwiseShiftRight, + Box::new(right), + )) +} + +/// Return a new expression with bitwise SHIFT LEFT +pub fn bitwise_shift_left(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + Operator::BitwiseShiftLeft, + Box::new(right), + )) +} + /// Create an expression to represent the count(distinct) aggregate function pub fn count_distinct(expr: Expr) -> Expr { Expr::AggregateFunction(AggregateFunction::new( diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 5421c43da1236..90de6df77dd21 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -338,7 +338,8 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { fn mutate(&mut self, expr: Expr) -> Result { use datafusion_expr::Operator::{ And, Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch, - RegexNotIMatch, RegexNotMatch, + RegexNotIMatch, RegexNotMatch, BitwiseAnd, BitwiseOr, BitwiseXor, + BitwiseShiftRight, BitwiseShiftLeft }; let info = self.info; @@ -700,6 +701,270 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { return Err(DataFusionError::ArrowError(ArrowError::DivideByZero)); } + // + // Rules for BitwiseAnd + // + + // A & null -> null + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseAnd, + right, + }) if is_null(&right) => *right, + + // null & A -> null + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right: _, + }) if is_null(&left) => *left, + + // A & 0 -> 0 + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseAnd, + right, + }) if is_zero(&right) => *right, + + // 0 & A -> 0 + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right: _, + }) if is_zero(&left) => *left, + + // !A & A -> 0 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => + Expr::Literal(ScalarValue::Int32(Some(0))), + + // A & !A -> 0 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => + Expr::Literal(ScalarValue::Int32(Some(0))), + + // (..A..) & A --> (..A..) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if expr_contains(&left, &right, BitwiseAnd) => *left, + + // A & (..A..) --> (..A..) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if expr_contains(&right, &left, BitwiseAnd) => *right, + + // A & (A | B) --> A (if B not null) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => *left, + + // (A | B) & A --> A (if B not null) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseAnd, + right, + }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => *right, + + // + // Rules for BitwiseOr + // + + // A | null -> null + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseOr, + right, + }) if is_null(&right) => *right, + + // null | A -> null + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right: _, + }) if is_null(&left) => *left, + + // A | 0 -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if is_zero(&right) => *left, + + // 0 | A -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if is_zero(&left) => *right, + + // !A | A -> -1 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => + Expr::Literal(ScalarValue::Int32(Some(-1))), + + // A | !A -> -1 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => + Expr::Literal(ScalarValue::Int32(Some(-1))), + + // (..A..) | A --> (..A..) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if expr_contains(&left, &right, BitwiseOr) => *left, + + // A | (..A..) --> (..A..) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if expr_contains(&left, &right, BitwiseOr) => *left, + + // A | (A & B) --> A (if B not null) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => *left, + + // (A & B) | A --> A (if B not null) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseOr, + right, + }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => *right, + + // + // Rules for BitwiseXor + // + + // A ^ null -> null + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseXor, + right, + }) if is_null(&right) => *right, + + // null ^ A -> null + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right: _, + }) if is_null(&left) => *left, + + // A ^ 0 -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if is_zero(&right) => *left, + + // 0 ^ A -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if is_zero(&left) => *right, + + // !A ^ A -> -1 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => + Expr::Literal(ScalarValue::Int32(Some(-1))), + + // A ^ !A -> -1 (if A not nullable) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => + Expr::Literal(ScalarValue::Int32(Some(-1))), + + // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if expr_contains(&left, &right, BitwiseXor) => delete_xor_in_complex_expr(&left, &right), + + // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A) + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseXor, + right, + }) if expr_contains(&right, &left, BitwiseXor) => delete_xor_in_complex_expr(&right, &left), + + // + // Rules for BitwiseShiftRight + // + + // A >> null -> null + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseShiftRight, + right, + }) if is_null(&right) => *right, + + // null >> A -> null + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseShiftRight, + right: _, + }) if is_null(&left) => *left, + + // A >> 0 -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseShiftRight, + right, + }) if is_zero(&right) => *left, + + // + // Rules for BitwiseShiftRight + // + + // A << null -> null + Expr::BinaryExpr(BinaryExpr { + left: _, + op: BitwiseShiftLeft, + right, + }) if is_null(&right) => *right, + + // null << A -> null + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseShiftLeft, + right: _, + }) if is_null(&left) => *left, + + // A << 0 -> A + Expr::BinaryExpr(BinaryExpr { + left, + op: BitwiseShiftLeft, + right, + }) if is_zero(&right) => *left, + // // Rules for Not // @@ -1346,6 +1611,373 @@ mod tests { assert_eq!(simplify(expr), expected); } + #[test] + fn test_simplify_bitwise_xor_by_null() { + let null = Expr::Literal(ScalarValue::Null); + // A ^ null --> null + { + let expr = binary_expr(col("c2"), Operator::BitwiseXor, null.clone()); + assert_eq!(simplify(expr), null); + } + // null ^ A --> null + { + let expr = binary_expr(null.clone(), Operator::BitwiseXor, col("c2")); + assert_eq!(simplify(expr), null); + } + } + + #[test] + fn test_simplify_bitwise_shift_right_by_null() { + let null = Expr::Literal(ScalarValue::Null); + // A >> null --> null + { + let expr = binary_expr(col("c2"), Operator::BitwiseShiftRight, null.clone()); + assert_eq!(simplify(expr), null); + } + // null >> A --> null + { + let expr = binary_expr(null.clone(), Operator::BitwiseShiftRight, col("c2")); + assert_eq!(simplify(expr), null); + } + } + + #[test] + fn test_simplify_bitwise_shift_left_by_null() { + let null = Expr::Literal(ScalarValue::Null); + // A << null --> null + { + let expr = binary_expr(col("c2"), Operator::BitwiseShiftLeft, null.clone()); + assert_eq!(simplify(expr), null); + } + // null << A --> null + { + let expr = binary_expr(null.clone(), Operator::BitwiseShiftLeft, col("c2")); + assert_eq!(simplify(expr), null); + } + } + + #[test] + fn test_simplify_bitwise_and_by_zero() { + // A & 0 --> 0 + { + let expr = binary_expr(col("c2"), Operator::BitwiseAnd, lit(0)); + assert_eq!(simplify(expr), lit(0)); + } + // 0 & A --> 0 + { + let expr = binary_expr(lit(0), Operator::BitwiseAnd, col("c2")); + assert_eq!(simplify(expr), lit(0)); + } + } + + #[test] + fn test_simplify_bitwise_or_by_zero() { + // A | 0 --> A + { + let expr = binary_expr(col("c2"), Operator::BitwiseOr, lit(0)); + assert_eq!(simplify(expr), col("c2")); + } + // 0 | A --> A + { + let expr = binary_expr(lit(0), Operator::BitwiseOr, col("c2")); + assert_eq!(simplify(expr), col("c2")); + } + } + + #[test] + fn test_simplify_bitwise_xor_by_zero() { + // A ^ 0 --> A + { + let expr = binary_expr(col("c2"), Operator::BitwiseXor, lit(0)); + assert_eq!(simplify(expr), col("c2")); + } + // 0 ^ A --> A + { + let expr = binary_expr(lit(0), Operator::BitwiseXor, col("c2")); + assert_eq!(simplify(expr), col("c2")); + } + } + + #[test] + fn test_simplify_bitwise_bitwise_shift_right_by_zero() { + // A >> 0 --> A + { + let expr = binary_expr(col("c2"), Operator::BitwiseShiftRight, lit(0)); + assert_eq!(simplify(expr), col("c2")); + } + } + + #[test] + fn test_simplify_bitwise_bitwise_shift_left_by_zero() { + // A << 0 --> A + { + let expr = binary_expr(col("c2"), Operator::BitwiseShiftLeft, lit(0)); + assert_eq!(simplify(expr), col("c2")); + } + } + + #[test] + fn test_simplify_bitwise_and_by_null() { + let null = Expr::Literal(ScalarValue::Null); + // A & null --> null + { + let expr = binary_expr(col("c2"), Operator::BitwiseAnd, null.clone()); + assert_eq!(simplify(expr), null); + } + // null & A --> null + { + let expr = binary_expr(null.clone(), Operator::BitwiseAnd, col("c2")); + assert_eq!(simplify(expr), null); + } + } + + #[test] + fn test_simplify_composed_bitwise_and() { + // ((c > 5) & (c1 < 6)) & (c > 5) --> ((c > 5) & c1) + + let expr = binary_expr( + binary_expr(col("c2").gt(lit(5)), Operator::BitwiseAnd, col("c1").lt(lit(6))), + Operator::BitwiseAnd, + col("c2").gt(lit(5)), + ); + let expected = + binary_expr(col("c2").gt(lit(5)), Operator::BitwiseAnd, col("c1").lt(lit(6))); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_composed_bitwise_or() { + // ((c > 5) | (c1 < 6)) | (c > 5) --> ((c > 5) | c1) + + let expr = binary_expr( + binary_expr(col("c2").gt(lit(5)), Operator::BitwiseOr, col("c1").lt(lit(6))), + Operator::BitwiseOr, + col("c2").gt(lit(5)), + ); + let expected = + binary_expr(col("c2").gt(lit(5)), Operator::BitwiseOr, col("c1").lt(lit(6))); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_composed_bitwise_xor() { + // with an even number of the column "c" + // c ^ ((c ^ (c | c1)) ^ (c1 & c)) --> (c | c1) ^ (c1 & c) + + let expr = binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + ), + Operator::BitwiseXor, + binary_expr( + col("c1"), + Operator::BitwiseAnd, + col("c2"), + ) + ) + ); + + let expected = + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseOr, + col("c1"), + ), + Operator::BitwiseXor, + binary_expr( + col("c1"), + Operator::BitwiseAnd, + col("c2"), + ), + ); + + assert_eq!(simplify(expr), expected); + + // with an odd number of the column "c" + // c ^ (c ^ (c | c1)) ^ ((c1 & c) ^ c) --> c ^ ((c | c1) ^ (c1 & c)) + + let expr = binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + ), + Operator::BitwiseXor, + binary_expr( + binary_expr( + col("c1"), + Operator::BitwiseAnd, + col("c2"), + ), + Operator::BitwiseXor, + col("c2"), + ) + ) + ); + + let expected = + binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseOr, + col("c1"), + ), + Operator::BitwiseXor, + binary_expr( + col("c1"), + Operator::BitwiseAnd, + col("c2"), + ), + ) + ); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_negated_bitwise_and() { + // !(c > 5) & (c > 5) --> 0 + let expr = binary_expr( + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Operator::BitwiseAnd, + col("c2_non_null").gt(lit(5)), + ); + let expected = lit(0); + + assert_eq!(simplify(expr), expected); + + // (c > 5) & !(c > 5) --> 0 + let expr = binary_expr( + col("c2_non_null").gt(lit(5)), + Operator::BitwiseAnd, + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + ); + let expected = lit(0); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_negated_bitwise_or() { + // !(c > 5) | (c > 5) --> -1 + let expr = binary_expr( + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Operator::BitwiseOr, + col("c2_non_null").gt(lit(5)), + ); + let expected = lit(-1); + + assert_eq!(simplify(expr), expected); + + // (c > 5) | !(c > 5) --> -1 + let expr = binary_expr( + col("c2_non_null").gt(lit(5)), + Operator::BitwiseOr, + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + ); + let expected = lit(-1); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_negated_bitwise_xor() { + // !(c > 5) ^ (c > 5) --> -1 + let expr = binary_expr( + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Operator::BitwiseXor, + col("c2_non_null").gt(lit(5)), + ); + let expected = lit(-1); + + assert_eq!(simplify(expr), expected); + + // (c > 5) ^ !(c > 5) --> -1 + let expr = binary_expr( + col("c2_non_null").gt(lit(5)), + Operator::BitwiseXor, + Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + ); + let expected = lit(-1); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_bitwise_and_or() { + // (c < 3) & ((c < 3) | c1) -> (c < 3) + let expr = binary_expr( + col("c2_non_null").lt(lit(3)), + Operator::BitwiseAnd, + binary_expr( + col("c2_non_null").lt(lit(3)), + Operator::BitwiseOr, + col("c1_non_null"), + ) + ); + let expected = col("c2_non_null").lt(lit(3)); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_bitwise_or_and() { + // (c < 3) | ((c < 3) & c1) -> (c < 3) + let expr = binary_expr( + col("c2_non_null").lt(lit(3)), + Operator::BitwiseOr, + binary_expr( + col("c2_non_null").lt(lit(3)), + Operator::BitwiseAnd, + col("c1_non_null"), + ) + ); + let expected = col("c2_non_null").lt(lit(3)); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_simple_bitwise_and() { + // (c > 5) & (c > 5) -> (c > 5) + let expr = (col("c2").gt(lit(5))).bitwise_and(col("c2").gt(lit(5))); + let expected = col("c2").gt(lit(5)); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_simple_bitwise_or() { + // (c > 5) | (c > 5) -> (c > 5) + let expr = (col("c2").gt(lit(5))).bitwise_or(col("c2").gt(lit(5))); + let expected = col("c2").gt(lit(5)); + + assert_eq!(simplify(expr), expected); + } + + #[test] + fn test_simplify_simple_bitwise_xor() { + // (c > 5) ^ (c > 5) -> 0 + let expr = (col("c2").gt(lit(5))).bitwise_xor(col("c2").gt(lit(5))); + let expected = lit(0); + + assert_eq!(simplify(expr), expected); + } + #[test] #[should_panic( expected = "called `Result::unwrap()` on an `Err` value: ArrowError(DivideByZero)" @@ -1357,7 +1989,7 @@ mod tests { #[test] fn test_simplify_simple_and() { - // (c > 5) AND (c > 5) + // (c > 5) AND (c > 5) -> (c > 5) let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5))); let expected = col("c2").gt(lit(5)); @@ -1380,7 +2012,7 @@ mod tests { #[test] fn test_simplify_negated_and() { - // (c > 5) AND !(c > 5) -- > (c > 5) AND (c <= 5) + // (c > 5) AND !(c > 5) --> (c > 5) AND (c <= 5) let expr = binary_expr( col("c2").gt(lit(5)), Operator::And, @@ -1808,7 +2440,7 @@ mod tests { let schema = expr_test_schema(); assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean); - // true = ture -> true + // true = true -> true assert_eq!(simplify(lit(true).eq(lit(true))), lit(true)); // true = false -> false diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 10e5c0e874304..6de5e927ebdbb 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -23,6 +23,7 @@ use datafusion_expr::{ expr_fn::{and, concat_ws, or}, lit, BuiltinScalarFunction, Expr, Like, Operator, }; +use std::collections::VecDeque; pub static POWS_OF_TEN: [i128; 38] = [ 1, @@ -77,6 +78,73 @@ pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { } } +/// This enum is needed for the function "delete_expressions_in_complex_expr" for +/// creating a postfix notation vector +#[derive(PartialEq)] +enum SyntaxTreeItem { + Operator(Operator), + Expr(Expr), +} + +/// Deletes all 'needles' or remain one 'needle' that are found in a chain of search_op +/// expressions for XOR optimization. Such as: A ^ (A ^ (B ^ A)) +pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr) -> Expr { + let mut postfix_notation: VecDeque = VecDeque::new(); + /// Creates postfix notation + fn create_postfix_notation(postfix_notation: &mut VecDeque, expr: &Expr) { + match expr { + Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::BitwiseXor => { + postfix_notation.push_back(SyntaxTreeItem::Operator(*op)); + create_postfix_notation(postfix_notation, left); + create_postfix_notation(postfix_notation, right); + } + _ => { + postfix_notation.push_back(SyntaxTreeItem::Expr(expr.clone())) + } + } + } + let mut xor_counter: i32 = 0; + create_postfix_notation(&mut postfix_notation, expr); + + /// Restores expression from postfix notation + fn restore_expr(postfix_notation: &mut VecDeque, xor_counter: &mut i32, needle: &Expr) -> Expr { + let item = postfix_notation.pop_front().unwrap(); + + match item { + SyntaxTreeItem::Operator(operator) => { + let left = restore_expr(postfix_notation, xor_counter, needle); + let right = restore_expr(postfix_notation, xor_counter, needle); + if left == *needle { + *xor_counter += 1; + return right; + } else if right == *needle { + *xor_counter += 1; + return left; + } + + Expr::BinaryExpr(BinaryExpr::new( + Box::new(left), + operator, + Box::new(right), + )) + } + SyntaxTreeItem::Expr(expr) => expr + } + } + + let result_expr = restore_expr(&mut postfix_notation, &mut xor_counter, needle); + if result_expr == *needle { + return Expr::Literal(ScalarValue::Int32(Some(0))) + } else if xor_counter % 2 == 0 { + return Expr::BinaryExpr(BinaryExpr::new( + Box::new(needle.clone()), + Operator::BitwiseXor, + Box::new(result_expr), + )); + } + result_expr +} + pub fn is_zero(s: &Expr) -> bool { match s { Expr::Literal(ScalarValue::Int8(Some(0))) @@ -154,11 +222,16 @@ pub fn is_op_with(target_op: Operator, haystack: &Expr, needle: &Expr) -> bool { matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref())) } -/// returns true if `not_expr` is !`expr` +/// returns true if `not_expr` is !`expr` (not) pub fn is_not_of(not_expr: &Expr, expr: &Expr) -> bool { matches!(not_expr, Expr::Not(inner) if expr == inner.as_ref()) } +/// returns true if `not_expr` is !`expr` (bitwise not) +pub fn is_negative_of(not_expr: &Expr, expr: &Expr) -> bool { + matches!(not_expr, Expr::Negative(inner) if expr == inner.as_ref()) +} + /// returns the contained boolean value in `expr` as /// `Expr::Literal(ScalarValue::Boolean(v))`. pub fn as_bool_lit(expr: Expr) -> Result> { From 1add43f0149215a5fa3739e1b06878d4d8747a56 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Mon, 27 Feb 2023 21:16:39 +0300 Subject: [PATCH 02/13] fix: cargo fmt --- .../simplify_expressions/expr_simplifier.rs | 158 +++++++++--------- .../src/simplify_expressions/utils.rs | 23 ++- 2 files changed, 97 insertions(+), 84 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 90de6df77dd21..793d9f442eb87 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -337,9 +337,9 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { /// rewrite the expression simplifying any constant expressions fn mutate(&mut self, expr: Expr) -> Result { use datafusion_expr::Operator::{ - And, Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch, - RegexNotIMatch, RegexNotMatch, BitwiseAnd, BitwiseOr, BitwiseXor, - BitwiseShiftRight, BitwiseShiftLeft + And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor, + Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch, + RegexNotIMatch, RegexNotMatch, }; let info = self.info; @@ -738,16 +738,18 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, - }) if is_negative_of(&left, &right) && !info.nullable(&right)? => - Expr::Literal(ScalarValue::Int32(Some(0))), + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { + Expr::Literal(ScalarValue::Int32(Some(0))) + } // A & !A -> 0 (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseAnd, right, - }) if is_negative_of(&right, &left) && !info.nullable(&left)? => - Expr::Literal(ScalarValue::Int32(Some(0))), + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { + Expr::Literal(ScalarValue::Int32(Some(0))) + } // (..A..) & A --> (..A..) Expr::BinaryExpr(BinaryExpr { @@ -768,14 +770,18 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, - }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => *left, + }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => { + *left + } // (A | B) & A --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseAnd, right, - }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => *right, + }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => { + *right + } // // Rules for BitwiseOr @@ -814,16 +820,18 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, - }) if is_negative_of(&left, &right) && !info.nullable(&right)? => - Expr::Literal(ScalarValue::Int32(Some(-1))), + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { + Expr::Literal(ScalarValue::Int32(Some(-1))) + } // A | !A -> -1 (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if is_negative_of(&right, &left) && !info.nullable(&left)? => - Expr::Literal(ScalarValue::Int32(Some(-1))), + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { + Expr::Literal(ScalarValue::Int32(Some(-1))) + } // (..A..) | A --> (..A..) Expr::BinaryExpr(BinaryExpr { @@ -844,14 +852,18 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, - }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => *left, + }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => { + *left + } // (A & B) | A --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => *right, + }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => { + *right + } // // Rules for BitwiseXor @@ -890,30 +902,36 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, - }) if is_negative_of(&left, &right) && !info.nullable(&right)? => - Expr::Literal(ScalarValue::Int32(Some(-1))), + }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { + Expr::Literal(ScalarValue::Int32(Some(-1))) + } // A ^ !A -> -1 (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseXor, right, - }) if is_negative_of(&right, &left) && !info.nullable(&left)? => - Expr::Literal(ScalarValue::Int32(Some(-1))), + }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { + Expr::Literal(ScalarValue::Int32(Some(-1))) + } // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseXor, right, - }) if expr_contains(&left, &right, BitwiseXor) => delete_xor_in_complex_expr(&left, &right), + }) if expr_contains(&left, &right, BitwiseXor) => { + delete_xor_in_complex_expr(&left, &right) + } // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseXor, right, - }) if expr_contains(&right, &left, BitwiseXor) => delete_xor_in_complex_expr(&right, &left), + }) if expr_contains(&right, &left, BitwiseXor) => { + delete_xor_in_complex_expr(&right, &left) + } // // Rules for BitwiseShiftRight @@ -1736,13 +1754,20 @@ mod tests { // ((c > 5) & (c1 < 6)) & (c > 5) --> ((c > 5) & c1) let expr = binary_expr( - binary_expr(col("c2").gt(lit(5)), Operator::BitwiseAnd, col("c1").lt(lit(6))), + binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseAnd, + col("c1").lt(lit(6)), + ), Operator::BitwiseAnd, col("c2").gt(lit(5)), ); - let expected = - binary_expr(col("c2").gt(lit(5)), Operator::BitwiseAnd, col("c1").lt(lit(6))); - + let expected = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseAnd, + col("c1").lt(lit(6)), + ); + assert_eq!(simplify(expr), expected); } @@ -1751,13 +1776,20 @@ mod tests { // ((c > 5) | (c1 < 6)) | (c > 5) --> ((c > 5) | c1) let expr = binary_expr( - binary_expr(col("c2").gt(lit(5)), Operator::BitwiseOr, col("c1").lt(lit(6))), + binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseOr, + col("c1").lt(lit(6)), + ), Operator::BitwiseOr, col("c2").gt(lit(5)), ); - let expected = - binary_expr(col("c2").gt(lit(5)), Operator::BitwiseOr, col("c1").lt(lit(6))); - + let expected = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseOr, + col("c1").lt(lit(6)), + ); + assert_eq!(simplify(expr), expected); } @@ -1776,29 +1808,16 @@ mod tests { binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), ), Operator::BitwiseXor, - binary_expr( - col("c1"), - Operator::BitwiseAnd, - col("c2"), - ) - ) + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + ), + ); + + let expected = binary_expr( + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + Operator::BitwiseXor, + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), ); - let expected = - binary_expr( - binary_expr( - col("c2"), - Operator::BitwiseOr, - col("c1"), - ), - Operator::BitwiseXor, - binary_expr( - col("c1"), - Operator::BitwiseAnd, - col("c2"), - ), - ); - assert_eq!(simplify(expr), expected); // with an odd number of the column "c" @@ -1815,36 +1834,23 @@ mod tests { ), Operator::BitwiseXor, binary_expr( - binary_expr( - col("c1"), - Operator::BitwiseAnd, - col("c2"), - ), + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), Operator::BitwiseXor, col("c2"), - ) - ) + ), + ), ); - let expected = + let expected = binary_expr( + col("c2"), + Operator::BitwiseXor, binary_expr( - col("c2"), + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), Operator::BitwiseXor, - binary_expr( - binary_expr( - col("c2"), - Operator::BitwiseOr, - col("c1"), - ), - Operator::BitwiseXor, - binary_expr( - col("c1"), - Operator::BitwiseAnd, - col("c2"), - ), - ) - ); - + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + ), + ); + assert_eq!(simplify(expr), expected); } @@ -1927,7 +1933,7 @@ mod tests { col("c2_non_null").lt(lit(3)), Operator::BitwiseOr, col("c1_non_null"), - ) + ), ); let expected = col("c2_non_null").lt(lit(3)); @@ -1944,7 +1950,7 @@ mod tests { col("c2_non_null").lt(lit(3)), Operator::BitwiseAnd, col("c1_non_null"), - ) + ), ); let expected = col("c2_non_null").lt(lit(3)); diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 6de5e927ebdbb..0e30e90213912 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -91,23 +91,30 @@ enum SyntaxTreeItem { pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr) -> Expr { let mut postfix_notation: VecDeque = VecDeque::new(); /// Creates postfix notation - fn create_postfix_notation(postfix_notation: &mut VecDeque, expr: &Expr) { + fn create_postfix_notation( + postfix_notation: &mut VecDeque, + expr: &Expr, + ) { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::BitwiseXor => { + Expr::BinaryExpr(BinaryExpr { left, op, right }) + if *op == Operator::BitwiseXor => + { postfix_notation.push_back(SyntaxTreeItem::Operator(*op)); create_postfix_notation(postfix_notation, left); create_postfix_notation(postfix_notation, right); } - _ => { - postfix_notation.push_back(SyntaxTreeItem::Expr(expr.clone())) - } + _ => postfix_notation.push_back(SyntaxTreeItem::Expr(expr.clone())), } } let mut xor_counter: i32 = 0; create_postfix_notation(&mut postfix_notation, expr); /// Restores expression from postfix notation - fn restore_expr(postfix_notation: &mut VecDeque, xor_counter: &mut i32, needle: &Expr) -> Expr { + fn restore_expr( + postfix_notation: &mut VecDeque, + xor_counter: &mut i32, + needle: &Expr, + ) -> Expr { let item = postfix_notation.pop_front().unwrap(); match item { @@ -128,13 +135,13 @@ pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr) -> Expr { Box::new(right), )) } - SyntaxTreeItem::Expr(expr) => expr + SyntaxTreeItem::Expr(expr) => expr, } } let result_expr = restore_expr(&mut postfix_notation, &mut xor_counter, needle); if result_expr == *needle { - return Expr::Literal(ScalarValue::Int32(Some(0))) + return Expr::Literal(ScalarValue::Int32(Some(0))); } else if xor_counter % 2 == 0 { return Expr::BinaryExpr(BinaryExpr::new( Box::new(needle.clone()), From febd022ea3633ec6bd5df948bc8542f0ab9ecc46 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 2 Mar 2023 16:33:37 +0300 Subject: [PATCH 03/13] fix: comments, determining data type of an int literal, additional tests --- .../src/simplify_expressions/context.rs | 15 ++ .../simplify_expressions/expr_simplifier.rs | 231 +++++++++++++----- .../src/simplify_expressions/utils.rs | 18 +- 3 files changed, 197 insertions(+), 67 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/context.rs b/datafusion/optimizer/src/simplify_expressions/context.rs index 379a803f47a51..caafdbcbcb65b 100644 --- a/datafusion/optimizer/src/simplify_expressions/context.rs +++ b/datafusion/optimizer/src/simplify_expressions/context.rs @@ -38,6 +38,9 @@ pub trait SimplifyInfo { /// Returns details needed for partial expression evaluation fn execution_props(&self) -> &ExecutionProps; + + /// Returns data type of this expr needed for determining optimized int type of a value + fn get_data_type(&self, expr: &Expr) -> DataType; } /// Provides simplification information based on DFSchema and @@ -123,6 +126,18 @@ impl<'a> SimplifyInfo for SimplifyContext<'a> { }) } + /// Returns data type of this expr needed for determining optimized int type of a value + fn get_data_type(&self, expr: &Expr) -> DataType { + if self.schemas.len() == 1 { + match expr.get_type(&self.schemas[0]) { + Ok(expr_data_type) => expr_data_type, + Err(_) => DataType::Int32, + } + } else { + DataType::Int32 + } + } + fn execution_props(&self) -> &ExecutionProps { self.props } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 793d9f442eb87..39538766cc083 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -719,19 +719,19 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A & 0 -> 0 + // A & 0 -> 0 (if A not nullable) Expr::BinaryExpr(BinaryExpr { - left: _, + left, op: BitwiseAnd, right, - }) if is_zero(&right) => *right, + }) if !info.nullable(&left)? && is_zero(&right) => *right, - // 0 & A -> 0 + // 0 & A -> 0 (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseAnd, - right: _, - }) if is_zero(&left) => *left, + right, + }) if !info.nullable(&right)? && is_zero(&left) => *left, // !A & A -> 0 (if A not nullable) Expr::BinaryExpr(BinaryExpr { @@ -739,7 +739,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::Int32(Some(0))) + new_int_by_expr_data_type(0, &info.get_data_type(&left)) } // A & !A -> 0 (if A not nullable) @@ -748,7 +748,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::Int32(Some(0))) + new_int_by_expr_data_type(0, &info.get_data_type(&left)) } // (..A..) & A --> (..A..) @@ -801,19 +801,19 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A | 0 -> A + // A | 0 -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if is_zero(&right) => *left, + }) if !info.nullable(&left)? && is_zero(&right) => *left, - // 0 | A -> A + // 0 | A -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if is_zero(&left) => *right, + }) if !info.nullable(&right)? && is_zero(&left) => *right, // !A | A -> -1 (if A not nullable) Expr::BinaryExpr(BinaryExpr { @@ -821,7 +821,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::Int32(Some(-1))) + new_int_by_expr_data_type(-1, &info.get_data_type(&left)) } // A | !A -> -1 (if A not nullable) @@ -830,7 +830,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::Int32(Some(-1))) + new_int_by_expr_data_type(-1, &info.get_data_type(&left)) } // (..A..) | A --> (..A..) @@ -845,7 +845,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, - }) if expr_contains(&left, &right, BitwiseOr) => *left, + }) if expr_contains(&right, &left, BitwiseOr) => *right, // A | (A & B) --> A (if B not null) Expr::BinaryExpr(BinaryExpr { @@ -883,19 +883,19 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A ^ 0 -> A + // A ^ 0 -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseXor, right, - }) if is_zero(&right) => *left, + }) if !info.nullable(&left)? && is_zero(&right) => *left, - // 0 ^ A -> A + // 0 ^ A -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseXor, right, - }) if is_zero(&left) => *right, + }) if !info.nullable(&right)? && is_zero(&left) => *right, // !A ^ A -> -1 (if A not nullable) Expr::BinaryExpr(BinaryExpr { @@ -903,7 +903,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::Int32(Some(-1))) + new_int_by_expr_data_type(-1, &info.get_data_type(&left)) } // A ^ !A -> -1 (if A not nullable) @@ -912,7 +912,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::Int32(Some(-1))) + new_int_by_expr_data_type(-1, &info.get_data_type(&left)) } // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) @@ -951,12 +951,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A >> 0 -> A + // A >> 0 -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseShiftRight, right, - }) if is_zero(&right) => *left, + }) if !info.nullable(&left)? && is_zero(&right) => *left, // // Rules for BitwiseShiftRight @@ -976,12 +976,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A << 0 -> A + // A << 0 -> A (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseShiftLeft, right, - }) if is_zero(&right) => *left, + }) if !info.nullable(&left)? && is_zero(&right) => *left, // // Rules for Not @@ -1678,12 +1678,12 @@ mod tests { fn test_simplify_bitwise_and_by_zero() { // A & 0 --> 0 { - let expr = binary_expr(col("c2"), Operator::BitwiseAnd, lit(0)); + let expr = binary_expr(col("c2_non_null"), Operator::BitwiseAnd, lit(0)); assert_eq!(simplify(expr), lit(0)); } // 0 & A --> 0 { - let expr = binary_expr(lit(0), Operator::BitwiseAnd, col("c2")); + let expr = binary_expr(lit(0), Operator::BitwiseAnd, col("c2_non_null")); assert_eq!(simplify(expr), lit(0)); } } @@ -1692,13 +1692,13 @@ mod tests { fn test_simplify_bitwise_or_by_zero() { // A | 0 --> A { - let expr = binary_expr(col("c2"), Operator::BitwiseOr, lit(0)); - assert_eq!(simplify(expr), col("c2")); + let expr = binary_expr(col("c2_non_null"), Operator::BitwiseOr, lit(0)); + assert_eq!(simplify(expr), col("c2_non_null")); } // 0 | A --> A { - let expr = binary_expr(lit(0), Operator::BitwiseOr, col("c2")); - assert_eq!(simplify(expr), col("c2")); + let expr = binary_expr(lit(0), Operator::BitwiseOr, col("c2_non_null")); + assert_eq!(simplify(expr), col("c2_non_null")); } } @@ -1706,13 +1706,13 @@ mod tests { fn test_simplify_bitwise_xor_by_zero() { // A ^ 0 --> A { - let expr = binary_expr(col("c2"), Operator::BitwiseXor, lit(0)); - assert_eq!(simplify(expr), col("c2")); + let expr = binary_expr(col("c2_non_null"), Operator::BitwiseXor, lit(0)); + assert_eq!(simplify(expr), col("c2_non_null")); } // 0 ^ A --> A { - let expr = binary_expr(lit(0), Operator::BitwiseXor, col("c2")); - assert_eq!(simplify(expr), col("c2")); + let expr = binary_expr(lit(0), Operator::BitwiseXor, col("c2_non_null")); + assert_eq!(simplify(expr), col("c2_non_null")); } } @@ -1720,8 +1720,9 @@ mod tests { fn test_simplify_bitwise_bitwise_shift_right_by_zero() { // A >> 0 --> A { - let expr = binary_expr(col("c2"), Operator::BitwiseShiftRight, lit(0)); - assert_eq!(simplify(expr), col("c2")); + let expr = + binary_expr(col("c2_non_null"), Operator::BitwiseShiftRight, lit(0)); + assert_eq!(simplify(expr), col("c2_non_null")); } } @@ -1729,8 +1730,9 @@ mod tests { fn test_simplify_bitwise_bitwise_shift_left_by_zero() { // A << 0 --> A { - let expr = binary_expr(col("c2"), Operator::BitwiseShiftLeft, lit(0)); - assert_eq!(simplify(expr), col("c2")); + let expr = + binary_expr(col("c2_non_null"), Operator::BitwiseShiftLeft, lit(0)); + assert_eq!(simplify(expr), col("c2_non_null")); } } @@ -1751,7 +1753,7 @@ mod tests { #[test] fn test_simplify_composed_bitwise_and() { - // ((c > 5) & (c1 < 6)) & (c > 5) --> ((c > 5) & c1) + // ((c2 > 5) & (c1 < 6)) & (c2 > 5) --> (c2 > 5) & (c1 < 6) let expr = binary_expr( binary_expr( @@ -1769,11 +1771,29 @@ mod tests { ); assert_eq!(simplify(expr), expected); + + // (c2 > 5) & ((c2 > 5) & (c1 < 6)) --> (c2 > 5) & (c1 < 6) + + let expr = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseAnd, + binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseAnd, + col("c1").lt(lit(6)), + ), + ); + let expected = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseAnd, + col("c1").lt(lit(6)), + ); + assert_eq!(simplify(expr), expected); } #[test] fn test_simplify_composed_bitwise_or() { - // ((c > 5) | (c1 < 6)) | (c > 5) --> ((c > 5) | c1) + // ((c2 > 5) | (c1 < 6)) | (c2 > 5) --> (c2 > 5) | (c1 < 6) let expr = binary_expr( binary_expr( @@ -1791,12 +1811,31 @@ mod tests { ); assert_eq!(simplify(expr), expected); + + // (c2 > 5) | ((c2 > 5) | (c1 < 6)) --> (c2 > 5) | (c1 < 6) + + let expr = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseOr, + binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseOr, + col("c1").lt(lit(6)), + ), + ); + let expected = binary_expr( + col("c2").gt(lit(5)), + Operator::BitwiseOr, + col("c1").lt(lit(6)), + ); + + assert_eq!(simplify(expr), expected); } #[test] fn test_simplify_composed_bitwise_xor() { - // with an even number of the column "c" - // c ^ ((c ^ (c | c1)) ^ (c1 & c)) --> (c | c1) ^ (c1 & c) + // with an even number of the column "c2" + // c2 ^ ((c2 ^ (c2 | c1)) ^ (c1 & c2)) --> (c2 | c1) ^ (c1 & c2) let expr = binary_expr( col("c2"), @@ -1820,8 +1859,8 @@ mod tests { assert_eq!(simplify(expr), expected); - // with an odd number of the column "c" - // c ^ (c ^ (c | c1)) ^ ((c1 & c) ^ c) --> c ^ ((c | c1) ^ (c1 & c)) + // with an odd number of the column "c2" + // c2 ^ (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) --> c2 ^ ((c2 | c1) ^ (c1 & c2)) let expr = binary_expr( col("c2"), @@ -1856,76 +1895,134 @@ mod tests { #[test] fn test_simplify_negated_bitwise_and() { - // !(c > 5) & (c > 5) --> 0 + // !(c2 > 5) & (c2 > 5) --> 0 let expr = binary_expr( Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), Operator::BitwiseAnd, col("c2_non_null").gt(lit(5)), ); - let expected = lit(0); + let expected = Expr::Literal(ScalarValue::Int32(Some(0))); assert_eq!(simplify(expr), expected); - - // (c > 5) & !(c > 5) --> 0 + // (c2 > 5) & !(c2 > 5) --> 0 let expr = binary_expr( col("c2_non_null").gt(lit(5)), Operator::BitwiseAnd, Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), ); - let expected = lit(0); + let expected = Expr::Literal(ScalarValue::Int32(Some(0))); + + assert_eq!(simplify(expr), expected); + + // !c3 & c3 --> 0 + let expr = binary_expr( + Expr::Negative(Box::new(col("c3_non_null"))), + Operator::BitwiseAnd, + col("c3_non_null"), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(0))); + + assert_eq!(simplify(expr), expected); + // c3 & !c3 --> 0 + let expr = binary_expr( + col("c3_non_null"), + Operator::BitwiseAnd, + Expr::Negative(Box::new(col("c3_non_null"))), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(0))); assert_eq!(simplify(expr), expected); } #[test] fn test_simplify_negated_bitwise_or() { - // !(c > 5) | (c > 5) --> -1 + // !(c2 > 5) | (c2 > 5) --> -1 let expr = binary_expr( Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), Operator::BitwiseOr, col("c2_non_null").gt(lit(5)), ); - let expected = lit(-1); + let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); assert_eq!(simplify(expr), expected); - // (c > 5) | !(c > 5) --> -1 + // (c2 > 5) | !(c2 > 5) --> -1 let expr = binary_expr( col("c2_non_null").gt(lit(5)), Operator::BitwiseOr, Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), ); - let expected = lit(-1); + let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); + + assert_eq!(simplify(expr), expected); + + // !c3 | c3 --> -1 + let expr = binary_expr( + Expr::Negative(Box::new(col("c3_non_null"))), + Operator::BitwiseOr, + col("c3_non_null"), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(-1))); + + assert_eq!(simplify(expr), expected); + + // c3 | !c3 --> -1 + let expr = binary_expr( + col("c3_non_null"), + Operator::BitwiseOr, + Expr::Negative(Box::new(col("c3_non_null"))), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(-1))); assert_eq!(simplify(expr), expected); } #[test] fn test_simplify_negated_bitwise_xor() { - // !(c > 5) ^ (c > 5) --> -1 + // !(c2 > 5) ^ (c2 > 5) --> -1 let expr = binary_expr( Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), Operator::BitwiseXor, col("c2_non_null").gt(lit(5)), ); - let expected = lit(-1); + let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); assert_eq!(simplify(expr), expected); - // (c > 5) ^ !(c > 5) --> -1 + // (c2 > 5) ^ !(c2 > 5) --> -1 let expr = binary_expr( col("c2_non_null").gt(lit(5)), Operator::BitwiseXor, Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), ); - let expected = lit(-1); + let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); + + assert_eq!(simplify(expr), expected); + + // !c3 ^ c3 --> -1 + let expr = binary_expr( + Expr::Negative(Box::new(col("c3_non_null"))), + Operator::BitwiseXor, + col("c3_non_null"), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(-1))); + + assert_eq!(simplify(expr), expected); + + // c3 ^ !c3 --> -1 + let expr = binary_expr( + col("c3_non_null"), + Operator::BitwiseXor, + Expr::Negative(Box::new(col("c3_non_null"))), + ); + let expected = Expr::Literal(ScalarValue::Int64(Some(-1))); assert_eq!(simplify(expr), expected); } #[test] fn test_simplify_bitwise_and_or() { - // (c < 3) & ((c < 3) | c1) -> (c < 3) + // (c2 < 3) & ((c2 < 3) | c1) -> (c2 < 3) let expr = binary_expr( col("c2_non_null").lt(lit(3)), Operator::BitwiseAnd, @@ -1942,7 +2039,7 @@ mod tests { #[test] fn test_simplify_bitwise_or_and() { - // (c < 3) | ((c < 3) & c1) -> (c < 3) + // (c2 < 3) | ((c2 < 3) & c1) -> (c2 < 3) let expr = binary_expr( col("c2_non_null").lt(lit(3)), Operator::BitwiseOr, @@ -1959,7 +2056,7 @@ mod tests { #[test] fn test_simplify_simple_bitwise_and() { - // (c > 5) & (c > 5) -> (c > 5) + // (c2 > 5) & (c2 > 5) -> (c2 > 5) let expr = (col("c2").gt(lit(5))).bitwise_and(col("c2").gt(lit(5))); let expected = col("c2").gt(lit(5)); @@ -1968,7 +2065,7 @@ mod tests { #[test] fn test_simplify_simple_bitwise_or() { - // (c > 5) | (c > 5) -> (c > 5) + // (c2 > 5) | (c2 > 5) -> (c2 > 5) let expr = (col("c2").gt(lit(5))).bitwise_or(col("c2").gt(lit(5))); let expected = col("c2").gt(lit(5)); @@ -1977,7 +2074,7 @@ mod tests { #[test] fn test_simplify_simple_bitwise_xor() { - // (c > 5) ^ (c > 5) -> 0 + // (c2 > 5) ^ (c2 > 5) -> 0 let expr = (col("c2").gt(lit(5))).bitwise_xor(col("c2").gt(lit(5))); let expected = lit(0); @@ -1995,7 +2092,7 @@ mod tests { #[test] fn test_simplify_simple_and() { - // (c > 5) AND (c > 5) -> (c > 5) + // (c2 > 5) AND (c2 > 5) -> (c2 > 5) let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5))); let expected = col("c2").gt(lit(5)); @@ -2004,7 +2101,7 @@ mod tests { #[test] fn test_simplify_composed_and() { - // ((c > 5) AND (c1 < 6)) AND (c > 5) + // ((c2 > 5) AND (c1 < 6)) AND (c2 > 5) let expr = binary_expr( binary_expr(col("c2").gt(lit(5)), Operator::And, col("c1").lt(lit(6))), Operator::And, @@ -2018,7 +2115,7 @@ mod tests { #[test] fn test_simplify_negated_and() { - // (c > 5) AND !(c > 5) --> (c > 5) AND (c <= 5) + // (c2 > 5) AND !(c2 > 5) --> (c2 > 5) AND (c2 <= 5) let expr = binary_expr( col("c2").gt(lit(5)), Operator::And, @@ -2398,8 +2495,10 @@ mod tests { vec![ DFField::new(None, "c1", DataType::Utf8, true), DFField::new(None, "c2", DataType::Boolean, true), + DFField::new(None, "c3", DataType::Int64, true), DFField::new(None, "c1_non_null", DataType::Utf8, false), DFField::new(None, "c2_non_null", DataType::Boolean, false), + DFField::new(None, "c3_non_null", DataType::Int64, false), ], HashMap::new(), ) diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 0e30e90213912..d6bb1fd8e3fa2 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -17,6 +17,7 @@ //! Utility functions for expression simplification +use arrow::datatypes::DataType; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::{ expr::{Between, BinaryExpr}, @@ -78,7 +79,7 @@ pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { } } -/// This enum is needed for the function "delete_expressions_in_complex_expr" for +/// This enum is needed for the function "delete_xor_in_complex_expr" for /// creating a postfix notation vector #[derive(PartialEq)] enum SyntaxTreeItem { @@ -239,6 +240,21 @@ pub fn is_negative_of(not_expr: &Expr, expr: &Expr) -> bool { matches!(not_expr, Expr::Negative(inner) if expr == inner.as_ref()) } +/// Returns a new int type based on data type of an expression +pub fn new_int_by_expr_data_type(val: i64, expr_data_type: &DataType) -> Expr { + match expr_data_type { + DataType::Int8 => Expr::Literal(ScalarValue::Int8(Some(val.try_into().unwrap()))), + DataType::Int16 => { + Expr::Literal(ScalarValue::Int16(Some(val.try_into().unwrap()))) + } + DataType::Int32 => { + Expr::Literal(ScalarValue::Int32(Some(val.try_into().unwrap()))) + } + DataType::Int64 => Expr::Literal(ScalarValue::Int64(Some(val))), + _ => Expr::Literal(ScalarValue::Int32(Some(val.try_into().unwrap()))), + } +} + /// returns the contained boolean value in `expr` as /// `Expr::Literal(ScalarValue::Boolean(v))`. pub fn as_bool_lit(expr: Expr) -> Result> { From ef983aea2b1204d81762c66a4ccf35fe958f36f4 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 2 Mar 2023 17:03:40 +0300 Subject: [PATCH 04/13] fix: add "get_data_type" to impl SimplifyInfo for MyInfo --- datafusion/core/tests/simplification.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index 6e74fc0d9be85..f62c7c2ad7a5a 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -49,6 +49,8 @@ impl SimplifyInfo for MyInfo { fn execution_props(&self) -> &ExecutionProps { &self.execution_props } + + fn get_data_type(&self, expr: &Expr) -> DataType {} } impl From for MyInfo { From 35f741ea6d874df6fe6602feaa95faa5fc7e824e Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 2 Mar 2023 17:23:10 +0300 Subject: [PATCH 05/13] fix: add behavior of the function "get_data_type" --- datafusion/core/tests/simplification.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index f62c7c2ad7a5a..a20eced796bb8 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -50,7 +50,12 @@ impl SimplifyInfo for MyInfo { &self.execution_props } - fn get_data_type(&self, expr: &Expr) -> DataType {} + fn get_data_type(&self, expr: &Expr) -> DataType { + match expr.get_type(&self.schema) { + Ok(expr_data_type) => expr_data_type, + Err(_) => DataType::Int32, + } + } } impl From for MyInfo { From 59efc921f1d0ce44e2774e7cd166482ec0537e21 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 2 Mar 2023 18:02:12 +0300 Subject: [PATCH 06/13] add information about "get_data_type" to the docs --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 39538766cc083..dbaef9f2fdac0 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -92,6 +92,9 @@ impl ExprSimplifier { /// fn execution_props(&self) -> &ExecutionProps { /// &self.execution_props /// } + /// fn get_data_type(&self, expr: &Expr) -> DataType { + /// DataType::Int32 + /// } /// } /// /// // Create the simplifier From 34208c947c016538a070f5163b25e2226a180363 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 2 Mar 2023 18:37:55 +0300 Subject: [PATCH 07/13] fix: doc tests for "expr_simplifier.rs" --- datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index dbaef9f2fdac0..e41cff7e8ba78 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -70,6 +70,7 @@ impl ExprSimplifier { /// `b > 2` /// /// ``` + /// use arrow::datatypes::DataType; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_common::Result; /// use datafusion_physical_expr::execution_props::ExecutionProps; From f7923928f65e87f330a1ad9d9e3e953017ad4aae Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Fri, 3 Mar 2023 23:32:26 +0300 Subject: [PATCH 08/13] fix: delete_xor_in_complex_expr --- .../simplify_expressions/expr_simplifier.rs | 80 ++++++++++++++++- .../src/simplify_expressions/utils.rs | 88 +++++++------------ 2 files changed, 111 insertions(+), 57 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index e41cff7e8ba78..0956a5d6808c8 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -925,7 +925,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if expr_contains(&left, &right, BitwiseXor) => { - delete_xor_in_complex_expr(&left, &right) + let expr = delete_xor_in_complex_expr(&left, &right, false); + if expr == *right { + new_int_by_expr_data_type(0, &info.get_data_type(&right)) + } else { + expr + } } // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A) @@ -934,7 +939,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if expr_contains(&right, &left, BitwiseXor) => { - delete_xor_in_complex_expr(&right, &left) + let expr = delete_xor_in_complex_expr(&right, &left, true); + if expr == *left { + new_int_by_expr_data_type(0, &info.get_data_type(&left)) + } else { + expr + } } // @@ -1895,6 +1905,64 @@ mod tests { ); assert_eq!(simplify(expr), expected); + + // with an even number of the column "c2" + // ((c2 ^ (c2 | c1)) ^ (c1 & c2)) ^ c2 --> (c2 | c1) ^ (c1 & c2) + + let expr = binary_expr( + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + ), + Operator::BitwiseXor, + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + ), + Operator::BitwiseXor, + col("c2"), + ); + + let expected = binary_expr( + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + Operator::BitwiseXor, + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + ); + + assert_eq!(simplify(expr), expected); + + // with an odd number of the column "c2" + // (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) ^ c2 --> ((c2 | c1) ^ (c1 & c2)) ^ c2 + + let expr = binary_expr( + binary_expr( + binary_expr( + col("c2"), + Operator::BitwiseXor, + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + ), + Operator::BitwiseXor, + binary_expr( + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + Operator::BitwiseXor, + col("c2"), + ), + ), + Operator::BitwiseXor, + col("c2"), + ); + + let expected = binary_expr( + binary_expr( + binary_expr(col("c2"), Operator::BitwiseOr, col("c1")), + Operator::BitwiseXor, + binary_expr(col("c1"), Operator::BitwiseAnd, col("c2")), + ), + Operator::BitwiseXor, + col("c2"), + ); + + assert_eq!(simplify(expr), expected); } #[test] @@ -2080,7 +2148,13 @@ mod tests { fn test_simplify_simple_bitwise_xor() { // (c2 > 5) ^ (c2 > 5) -> 0 let expr = (col("c2").gt(lit(5))).bitwise_xor(col("c2").gt(lit(5))); - let expected = lit(0); + let expected = Expr::Literal(ScalarValue::Int32(Some(0))); + + assert_eq!(simplify(expr), expected); + + // c3 ^ c3 -> 0 + let expr = col("c3").bitwise_xor(col("c3")); + let expected = Expr::Literal(ScalarValue::Int64(Some(0))); assert_eq!(simplify(expr), expected); } diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index d6bb1fd8e3fa2..49f6a5ac78caa 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -24,7 +24,6 @@ use datafusion_expr::{ expr_fn::{and, concat_ws, or}, lit, BuiltinScalarFunction, Expr, Like, Operator, }; -use std::collections::VecDeque; pub static POWS_OF_TEN: [i128; 38] = [ 1, @@ -79,76 +78,57 @@ pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { } } -/// This enum is needed for the function "delete_xor_in_complex_expr" for -/// creating a postfix notation vector -#[derive(PartialEq)] -enum SyntaxTreeItem { - Operator(Operator), - Expr(Expr), -} - -/// Deletes all 'needles' or remain one 'needle' that are found in a chain of search_op -/// expressions for XOR optimization. Such as: A ^ (A ^ (B ^ A)) -pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr) -> Expr { - let mut postfix_notation: VecDeque = VecDeque::new(); - /// Creates postfix notation - fn create_postfix_notation( - postfix_notation: &mut VecDeque, +/// Deletes all 'needles' or remains one 'needle' that are found in a chain of xor +/// expressions. Such as: A ^ (A ^ (B ^ A)) +pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr, is_left: bool) -> Expr { + /// Deletes recursively 'needles' in a chain of xor expressions + fn recursive_delete_xor_in_expr( expr: &Expr, - ) { + needle: &Expr, + xor_counter: &mut i32, + ) -> Expr { match expr { Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::BitwiseXor => { - postfix_notation.push_back(SyntaxTreeItem::Operator(*op)); - create_postfix_notation(postfix_notation, left); - create_postfix_notation(postfix_notation, right); - } - _ => postfix_notation.push_back(SyntaxTreeItem::Expr(expr.clone())), - } - } - let mut xor_counter: i32 = 0; - create_postfix_notation(&mut postfix_notation, expr); - - /// Restores expression from postfix notation - fn restore_expr( - postfix_notation: &mut VecDeque, - xor_counter: &mut i32, - needle: &Expr, - ) -> Expr { - let item = postfix_notation.pop_front().unwrap(); - - match item { - SyntaxTreeItem::Operator(operator) => { - let left = restore_expr(postfix_notation, xor_counter, needle); - let right = restore_expr(postfix_notation, xor_counter, needle); - if left == *needle { + let left_expr = recursive_delete_xor_in_expr(left, needle, xor_counter); + let right_expr = recursive_delete_xor_in_expr(right, needle, xor_counter); + if left_expr == *needle { *xor_counter += 1; - return right; - } else if right == *needle { + return right_expr; + } else if right_expr == *needle { *xor_counter += 1; - return left; + return left_expr; } Expr::BinaryExpr(BinaryExpr::new( - Box::new(left), - operator, - Box::new(right), + Box::new(left_expr), + *op, + Box::new(right_expr), )) } - SyntaxTreeItem::Expr(expr) => expr, + _ => expr.clone(), } } - let result_expr = restore_expr(&mut postfix_notation, &mut xor_counter, needle); + let mut xor_counter: i32 = 0; + let result_expr = recursive_delete_xor_in_expr(expr, needle, &mut xor_counter); if result_expr == *needle { - return Expr::Literal(ScalarValue::Int32(Some(0))); + return needle.clone(); } else if xor_counter % 2 == 0 { - return Expr::BinaryExpr(BinaryExpr::new( - Box::new(needle.clone()), - Operator::BitwiseXor, - Box::new(result_expr), - )); + if is_left { + return Expr::BinaryExpr(BinaryExpr::new( + Box::new(needle.clone()), + Operator::BitwiseXor, + Box::new(result_expr), + )); + } else { + return Expr::BinaryExpr(BinaryExpr::new( + Box::new(result_expr), + Operator::BitwiseXor, + Box::new(needle.clone()), + )); + } } result_expr } From c7312a530b574662bb7202daccf0957bf21f1ca7 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Sun, 5 Mar 2023 18:25:00 +0300 Subject: [PATCH 09/13] fix: some bitwise optimization rules, that can work even if an expr is nullable --- .../src/simplify_expressions/expr_simplifier.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 0956a5d6808c8..2d7bd6479e703 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -805,19 +805,19 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A | 0 -> A (if A not nullable) + // A | 0 -> A (even if A is null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if !info.nullable(&left)? && is_zero(&right) => *left, + }) if is_zero(&right) => *left, - // 0 | A -> A (if A not nullable) + // 0 | A -> A (even if A is null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseOr, right, - }) if !info.nullable(&right)? && is_zero(&left) => *right, + }) if is_zero(&left) => *right, // !A | A -> -1 (if A not nullable) Expr::BinaryExpr(BinaryExpr { @@ -965,12 +965,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A >> 0 -> A (if A not nullable) + // A >> 0 -> A (even if A is null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseShiftRight, right, - }) if !info.nullable(&left)? && is_zero(&right) => *left, + }) if is_zero(&right) => *left, // // Rules for BitwiseShiftRight @@ -990,12 +990,12 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { right: _, }) if is_null(&left) => *left, - // A << 0 -> A (if A not nullable) + // A << 0 -> A (even if A is null) Expr::BinaryExpr(BinaryExpr { left, op: BitwiseShiftLeft, right, - }) if !info.nullable(&left)? && is_zero(&right) => *left, + }) if is_zero(&right) => *left, // // Rules for Not From d2779df07fef43b76747a9321fc595b4fe425704 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Sun, 5 Mar 2023 18:47:20 +0300 Subject: [PATCH 10/13] fix: prefer ScalarValue methods over "new_int_by_expr_data_type" --- datafusion/common/src/scalar.rs | 20 +++++++++++++++++++ .../simplify_expressions/expr_simplifier.rs | 16 +++++++-------- .../src/simplify_expressions/utils.rs | 15 -------------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 66c1f3f125526..f024d9101e7cf 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -1019,6 +1019,26 @@ impl ScalarValue { Self::List(scalars, Box::new(Field::new("item", child_type, true))) } + pub fn new_negative_one(datatype: &DataType) -> Result { + assert!(datatype.is_primitive()); + Ok(match datatype { + DataType::Int8 => ScalarValue::Int8(Some(-1)), + DataType::Int16 => ScalarValue::Int16(Some(-1)), + DataType::Int32 => ScalarValue::Int32(Some(-1)), + DataType::Int64 => ScalarValue::Int64(Some(-1)), + DataType::UInt8 => ScalarValue::UInt8(Some(-1)), + DataType::UInt16 => ScalarValue::UInt16(Some(-1)), + DataType::UInt32 => ScalarValue::UInt32(Some(-1)), + DataType::UInt64 => ScalarValue::UInt64(Some(-1)), + DataType::Float32 => ScalarValue::Float32(Some(-1.0)), + DataType::Float64 => ScalarValue::Float64(Some(-1.0)), + _ => { + return Err(DataFusionError::NotImplemented(format!( + "Can't create a negative one scalar from data_type \"{datatype:?}\"" + ))); + } + }) + } /// Getter for the `DataType` of the value pub fn get_datatype(&self) -> DataType { match self { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 2d7bd6479e703..8136a88fc3eba 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -743,7 +743,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - new_int_by_expr_data_type(0, &info.get_data_type(&left)) + ScalarValue::new_zero(&info.get_data_type(&left)) } // A & !A -> 0 (if A not nullable) @@ -752,7 +752,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - new_int_by_expr_data_type(0, &info.get_data_type(&left)) + ScalarValue::new_zero(&info.get_data_type(&left)) } // (..A..) & A --> (..A..) @@ -825,7 +825,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - new_int_by_expr_data_type(-1, &info.get_data_type(&left)) + ScalarValue::new_negative_one(&info.get_data_type(&left)) } // A | !A -> -1 (if A not nullable) @@ -834,7 +834,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - new_int_by_expr_data_type(-1, &info.get_data_type(&left)) + ScalarValue::new_negative_one(&info.get_data_type(&left)) } // (..A..) | A --> (..A..) @@ -907,7 +907,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - new_int_by_expr_data_type(-1, &info.get_data_type(&left)) + ScalarValue::new_negative_one(&info.get_data_type(&left)) } // A ^ !A -> -1 (if A not nullable) @@ -916,7 +916,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - new_int_by_expr_data_type(-1, &info.get_data_type(&left)) + ScalarValue::new_negative_one(&info.get_data_type(&left)) } // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) @@ -927,7 +927,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&left, &right, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&left, &right, false); if expr == *right { - new_int_by_expr_data_type(0, &info.get_data_type(&right)) + ScalarValue::new_zero(&info.get_data_type(&right)) } else { expr } @@ -941,7 +941,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&right, &left, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&right, &left, true); if expr == *left { - new_int_by_expr_data_type(0, &info.get_data_type(&left)) + ScalarValue::new_zero(&info.get_data_type(&left)) } else { expr } diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 49f6a5ac78caa..605c5553a8601 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -220,21 +220,6 @@ pub fn is_negative_of(not_expr: &Expr, expr: &Expr) -> bool { matches!(not_expr, Expr::Negative(inner) if expr == inner.as_ref()) } -/// Returns a new int type based on data type of an expression -pub fn new_int_by_expr_data_type(val: i64, expr_data_type: &DataType) -> Expr { - match expr_data_type { - DataType::Int8 => Expr::Literal(ScalarValue::Int8(Some(val.try_into().unwrap()))), - DataType::Int16 => { - Expr::Literal(ScalarValue::Int16(Some(val.try_into().unwrap()))) - } - DataType::Int32 => { - Expr::Literal(ScalarValue::Int32(Some(val.try_into().unwrap()))) - } - DataType::Int64 => Expr::Literal(ScalarValue::Int64(Some(val))), - _ => Expr::Literal(ScalarValue::Int32(Some(val.try_into().unwrap()))), - } -} - /// returns the contained boolean value in `expr` as /// `Expr::Literal(ScalarValue::Boolean(v))`. pub fn as_bool_lit(expr: Expr) -> Result> { From 336aa4b4c57b2a017cde9d59ce89f286192c4047 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Sun, 5 Mar 2023 19:03:38 +0300 Subject: [PATCH 11/13] fix: ScalarValue::new_negative_one --- datafusion/common/src/scalar.rs | 15 ++++++--------- .../src/simplify_expressions/expr_simplifier.rs | 16 ++++++++-------- .../optimizer/src/simplify_expressions/utils.rs | 1 - 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 0dc282e79ac37..4ad6952166e1d 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -1019,7 +1019,7 @@ impl ScalarValue { Self::List(scalars, Box::new(Field::new("item", child_type, true))) } - // Create a zero value in the given type. + /// Create a zero value in the given type. pub fn new_zero(datatype: &DataType) -> Result { assert!(datatype.is_primitive()); Ok(match datatype { @@ -1042,17 +1042,14 @@ impl ScalarValue { }) } + /// Create a negative one value in the given type. pub fn new_negative_one(datatype: &DataType) -> Result { assert!(datatype.is_primitive()); Ok(match datatype { - DataType::Int8 => ScalarValue::Int8(Some(-1)), - DataType::Int16 => ScalarValue::Int16(Some(-1)), - DataType::Int32 => ScalarValue::Int32(Some(-1)), - DataType::Int64 => ScalarValue::Int64(Some(-1)), - DataType::UInt8 => ScalarValue::UInt8(Some(-1)), - DataType::UInt16 => ScalarValue::UInt16(Some(-1)), - DataType::UInt32 => ScalarValue::UInt32(Some(-1)), - DataType::UInt64 => ScalarValue::UInt64(Some(-1)), + DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)), + DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)), + DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)), + DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)), DataType::Float32 => ScalarValue::Float32(Some(-1.0)), DataType::Float64 => ScalarValue::Float64(Some(-1.0)), _ => { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 8136a88fc3eba..08cd60e938499 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -743,7 +743,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - ScalarValue::new_zero(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) } // A & !A -> 0 (if A not nullable) @@ -752,7 +752,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - ScalarValue::new_zero(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) } // (..A..) & A --> (..A..) @@ -825,7 +825,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - ScalarValue::new_negative_one(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) } // A | !A -> -1 (if A not nullable) @@ -834,7 +834,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - ScalarValue::new_negative_one(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) } // (..A..) | A --> (..A..) @@ -907,7 +907,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - ScalarValue::new_negative_one(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) } // A ^ !A -> -1 (if A not nullable) @@ -916,7 +916,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - ScalarValue::new_negative_one(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) } // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) @@ -927,7 +927,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&left, &right, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&left, &right, false); if expr == *right { - ScalarValue::new_zero(&info.get_data_type(&right)) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&right))?) } else { expr } @@ -941,7 +941,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&right, &left, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&right, &left, true); if expr == *left { - ScalarValue::new_zero(&info.get_data_type(&left)) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) } else { expr } diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 605c5553a8601..352674c3a68e8 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -17,7 +17,6 @@ //! Utility functions for expression simplification -use arrow::datatypes::DataType; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::{ expr::{Between, BinaryExpr}, From a2a6706e1e411f2de4573c9663f211b251f16504 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Sun, 5 Mar 2023 19:55:56 +0300 Subject: [PATCH 12/13] fix: get_data_type --- datafusion/core/tests/simplification.rs | 6 +- .../src/simplify_expressions/context.rs | 12 ++-- .../simplify_expressions/expr_simplifier.rs | 68 ++++++++++--------- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/datafusion/core/tests/simplification.rs b/datafusion/core/tests/simplification.rs index a20eced796bb8..f6b944b50448e 100644 --- a/datafusion/core/tests/simplification.rs +++ b/datafusion/core/tests/simplification.rs @@ -50,10 +50,10 @@ impl SimplifyInfo for MyInfo { &self.execution_props } - fn get_data_type(&self, expr: &Expr) -> DataType { + fn get_data_type(&self, expr: &Expr) -> Result { match expr.get_type(&self.schema) { - Ok(expr_data_type) => expr_data_type, - Err(_) => DataType::Int32, + Ok(expr_data_type) => Ok(expr_data_type), + Err(e) => Err(e), } } } diff --git a/datafusion/optimizer/src/simplify_expressions/context.rs b/datafusion/optimizer/src/simplify_expressions/context.rs index caafdbcbcb65b..32edfb6affb65 100644 --- a/datafusion/optimizer/src/simplify_expressions/context.rs +++ b/datafusion/optimizer/src/simplify_expressions/context.rs @@ -40,7 +40,7 @@ pub trait SimplifyInfo { fn execution_props(&self) -> &ExecutionProps; /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> DataType; + fn get_data_type(&self, expr: &Expr) -> Result; } /// Provides simplification information based on DFSchema and @@ -127,14 +127,16 @@ impl<'a> SimplifyInfo for SimplifyContext<'a> { } /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> DataType { + fn get_data_type(&self, expr: &Expr) -> Result { if self.schemas.len() == 1 { match expr.get_type(&self.schemas[0]) { - Ok(expr_data_type) => expr_data_type, - Err(_) => DataType::Int32, + Ok(expr_data_type) => Ok(expr_data_type), + Err(e) => Err(e), } } else { - DataType::Int32 + Err(DataFusionError::Internal(format!( + "The expr has more than one schema, could not determine data type" + ))) } } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 08cd60e938499..220d532b03b14 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -93,8 +93,8 @@ impl ExprSimplifier { /// fn execution_props(&self) -> &ExecutionProps { /// &self.execution_props /// } - /// fn get_data_type(&self, expr: &Expr) -> DataType { - /// DataType::Int32 + /// fn get_data_type(&self, expr: &Expr) -> Result { + /// Ok(DataType::Int32) /// } /// } /// @@ -743,7 +743,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?) } // A & !A -> 0 (if A not nullable) @@ -752,7 +752,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseAnd, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?) } // (..A..) & A --> (..A..) @@ -825,7 +825,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } // A | !A -> -1 (if A not nullable) @@ -834,7 +834,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseOr, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } // (..A..) | A --> (..A..) @@ -907,7 +907,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { - Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } // A ^ !A -> -1 (if A not nullable) @@ -916,7 +916,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { op: BitwiseXor, right, }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { - Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A) @@ -927,7 +927,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&left, &right, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&left, &right, false); if expr == *right { - Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&right))?) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&right)?)?) } else { expr } @@ -941,7 +941,7 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { }) if expr_contains(&right, &left, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&right, &left, true); if expr == *left { - Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left))?) + Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?) } else { expr } @@ -1967,22 +1967,22 @@ mod tests { #[test] fn test_simplify_negated_bitwise_and() { - // !(c2 > 5) & (c2 > 5) --> 0 + // !c4 & c4 --> 0 let expr = binary_expr( - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), Operator::BitwiseAnd, - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), ); - let expected = Expr::Literal(ScalarValue::Int32(Some(0))); + let expected = Expr::Literal(ScalarValue::UInt32(Some(0))); assert_eq!(simplify(expr), expected); - // (c2 > 5) & !(c2 > 5) --> 0 + // c4 & !c4 --> 0 let expr = binary_expr( - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), Operator::BitwiseAnd, - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), ); - let expected = Expr::Literal(ScalarValue::Int32(Some(0))); + let expected = Expr::Literal(ScalarValue::UInt32(Some(0))); assert_eq!(simplify(expr), expected); @@ -2008,21 +2008,21 @@ mod tests { #[test] fn test_simplify_negated_bitwise_or() { - // !(c2 > 5) | (c2 > 5) --> -1 + // !c4 | c4 --> -1 let expr = binary_expr( - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), Operator::BitwiseOr, - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), ); let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); assert_eq!(simplify(expr), expected); - // (c2 > 5) | !(c2 > 5) --> -1 + // c4 | !c4 --> -1 let expr = binary_expr( - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), Operator::BitwiseOr, - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), ); let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); @@ -2051,21 +2051,21 @@ mod tests { #[test] fn test_simplify_negated_bitwise_xor() { - // !(c2 > 5) ^ (c2 > 5) --> -1 + // !c4 ^ c4 --> -1 let expr = binary_expr( - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), Operator::BitwiseXor, - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), ); let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); assert_eq!(simplify(expr), expected); - // (c2 > 5) ^ !(c2 > 5) --> -1 + // c4 ^ !c4 --> -1 let expr = binary_expr( - col("c2_non_null").gt(lit(5)), + col("c4_non_null"), Operator::BitwiseXor, - Expr::Negative(Box::new(col("c2_non_null").gt(lit(5)))), + Expr::Negative(Box::new(col("c4_non_null"))), ); let expected = Expr::Literal(ScalarValue::Int32(Some(-1))); @@ -2146,9 +2146,9 @@ mod tests { #[test] fn test_simplify_simple_bitwise_xor() { - // (c2 > 5) ^ (c2 > 5) -> 0 - let expr = (col("c2").gt(lit(5))).bitwise_xor(col("c2").gt(lit(5))); - let expected = Expr::Literal(ScalarValue::Int32(Some(0))); + // c4 ^ c4 -> 0 + let expr = (col("c4")).bitwise_xor(col("c4")); + let expected = Expr::Literal(ScalarValue::UInt32(Some(0))); assert_eq!(simplify(expr), expected); @@ -2574,9 +2574,11 @@ mod tests { DFField::new(None, "c1", DataType::Utf8, true), DFField::new(None, "c2", DataType::Boolean, true), DFField::new(None, "c3", DataType::Int64, true), + DFField::new(None, "c4", DataType::UInt32, true), DFField::new(None, "c1_non_null", DataType::Utf8, false), DFField::new(None, "c2_non_null", DataType::Boolean, false), DFField::new(None, "c3_non_null", DataType::Int64, false), + DFField::new(None, "c4_non_null", DataType::UInt32, false), ], HashMap::new(), ) From 5ee2f5bc8a0dc05f718685a891cb42339f986ab3 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Sun, 5 Mar 2023 20:15:41 +0300 Subject: [PATCH 13/13] fix: cargo clippy and fmt style --- datafusion/optimizer/src/simplify_expressions/context.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/context.rs b/datafusion/optimizer/src/simplify_expressions/context.rs index 32edfb6affb65..b6e3f07b29b33 100644 --- a/datafusion/optimizer/src/simplify_expressions/context.rs +++ b/datafusion/optimizer/src/simplify_expressions/context.rs @@ -134,9 +134,10 @@ impl<'a> SimplifyInfo for SimplifyContext<'a> { Err(e) => Err(e), } } else { - Err(DataFusionError::Internal(format!( + Err(DataFusionError::Internal( "The expr has more than one schema, could not determine data type" - ))) + .to_string(), + )) } }