From c9fd96ed0fa9519b3cbd51c4c2724050ce32c70f Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Tue, 30 Sep 2025 18:51:14 +0200 Subject: [PATCH] feat: makes Expr::not normalize more negations Adds IN and EXISTS --- datafusion/expr/src/operation.rs | 15 +- .../simplify_expressions/simplify_exprs.rs | 148 ++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/operation.rs b/datafusion/expr/src/operation.rs index 6b79a8248b293..3158a19dce449 100644 --- a/datafusion/expr/src/operation.rs +++ b/datafusion/expr/src/operation.rs @@ -17,8 +17,8 @@ //! This module contains implementations of operations (unary, binary etc.) for DataFusion expressions. +use crate::expr::{Exists, Expr, InList, InSubquery, Like}; use crate::expr_fn::binary_expr; -use crate::{Expr, Like}; use datafusion_expr_common::operator::Operator; use std::ops::{self, Not}; @@ -153,6 +153,19 @@ impl Not for Expr { escape_char, case_insensitive, )), + Expr::InList(InList { + expr, + list, + negated, + }) => Expr::InList(InList::new(expr, list, !negated)), + Expr::Exists(Exists { subquery, negated }) => { + Expr::Exists(Exists::new(subquery, !negated)) + } + Expr::InSubquery(InSubquery { + expr, + subquery, + negated, + }) => Expr::InSubquery(InSubquery::new(expr, subquery, !negated)), _ => Expr::Not(Box::new(self)), } } diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index b828888305d28..4faf9389cfac4 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -926,4 +926,152 @@ mod tests { "# ) } + + #[test] + fn simplify_not_in_list() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter(col("a").in_list(vec![lit("a"), lit("b")], false).not())? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r#" + Filter: test.a != Utf8("a") AND test.a != Utf8("b") + TableScan: test + "# + ) + } + + #[test] + fn simplify_not_not_in_list() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter( + col("a") + .in_list(vec![lit("a"), lit("b")], false) + .not() + .not(), + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r#" + Filter: test.a = Utf8("a") OR test.a = Utf8("b") + TableScan: test + "# + ) + } + + #[test] + fn simplify_not_exists() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + let table_scan2 = + datafusion_expr::table_scan(Some("test2"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter( + exists(Arc::new(LogicalPlanBuilder::from(table_scan2).build()?)).not(), + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r" + Filter: NOT EXISTS () + Subquery: + TableScan: test2 + TableScan: test + " + ) + } + + #[test] + fn simplify_not_not_exists() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + let table_scan2 = + datafusion_expr::table_scan(Some("test2"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter( + exists(Arc::new(LogicalPlanBuilder::from(table_scan2).build()?)) + .not() + .not(), + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r" + Filter: EXISTS () + Subquery: + TableScan: test2 + TableScan: test + " + ) + } + + #[test] + fn simplify_not_in_subquery() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + let table_scan2 = + datafusion_expr::table_scan(Some("test2"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter( + in_subquery( + col("a"), + Arc::new(LogicalPlanBuilder::from(table_scan2).build()?), + ) + .not(), + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r" + Filter: test.a NOT IN () + Subquery: + TableScan: test2 + TableScan: test + " + ) + } + + #[test] + fn simplify_not_not_in_subquery() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let table_scan = table_scan(Some("test"), &schema, None)?.build()?; + let table_scan2 = + datafusion_expr::table_scan(Some("test2"), &schema, None)?.build()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .filter( + in_subquery( + col("a"), + Arc::new(LogicalPlanBuilder::from(table_scan2).build()?), + ) + .not() + .not(), + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @ r" + Filter: test.a IN () + Subquery: + TableScan: test2 + TableScan: test + " + ) + } }