From cf87aa1e646ae61318a326cc4947a004721663ad Mon Sep 17 00:00:00 2001 From: AssHero Date: Mon, 26 Sep 2022 23:48:40 +0800 Subject: [PATCH 1/2] support cast/try_cast expr in reduceOuterJoin --- datafusion/optimizer/src/reduce_outer_join.rs | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/datafusion/optimizer/src/reduce_outer_join.rs b/datafusion/optimizer/src/reduce_outer_join.rs index 0553b6cf61df8..05284891d966e 100644 --- a/datafusion/optimizer/src/reduce_outer_join.rs +++ b/datafusion/optimizer/src/reduce_outer_join.rs @@ -350,6 +350,15 @@ fn extract_nonnullable_columns( false, ) } + Expr::Cast { expr, data_type: _ } | Expr::TryCast { expr, data_type: _ } => { + extract_nonnullable_columns( + expr, + nonnullable_cols, + left_schema, + right_schema, + false, + ) + } _ => Ok(()), } } @@ -358,9 +367,11 @@ fn extract_nonnullable_columns( mod tests { use super::*; use crate::test::*; + use arrow::datatypes::DataType; use datafusion_expr::{ - binary_expr, col, lit, + binary_expr, cast, col, lit, logical_plan::builder::LogicalPlanBuilder, + try_cast, Operator::{And, Or}, }; @@ -438,13 +449,13 @@ mod tests { None, )? .filter(binary_expr( - col("t1.b").gt(lit(10u32)), + cast(col("t1.b"), DataType::Int64).gt(lit(10u32)), Or, col("t1.c").lt(lit(20u32)), ))? .build()?; let expected = "\ - Filter: #t1.b > UInt32(10) OR #t1.c < UInt32(20)\ + Filter: CAST(#t1.b AS Int64) > UInt32(10) OR #t1.c < UInt32(20)\ \n Inner Join: #t1.a = #t2.a\ \n TableScan: t1\ \n TableScan: t2"; @@ -467,13 +478,13 @@ mod tests { None, )? .filter(binary_expr( - col("t1.b").gt(lit(10u32)), + try_cast(col("t1.b"), DataType::Int64).gt(lit(10u32)), And, col("t2.c").lt(lit(20u32)), ))? .build()?; let expected = "\ - Filter: #t1.b > UInt32(10) AND #t2.c < UInt32(20)\ + Filter: TRY_CAST(#t1.b AS Int64) > UInt32(10) AND #t2.c < UInt32(20)\ \n Inner Join: #t1.a = #t2.a\ \n TableScan: t1\ \n TableScan: t2"; From 9e948c0da0a82aba9c6538cb0d3364a294e23253 Mon Sep 17 00:00:00 2001 From: AssHero Date: Thu, 29 Sep 2022 11:18:05 +0800 Subject: [PATCH 2/2] add new test case for cast/try_cast expr in reduceOuterJoin --- datafusion/optimizer/src/reduce_outer_join.rs | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/datafusion/optimizer/src/reduce_outer_join.rs b/datafusion/optimizer/src/reduce_outer_join.rs index 05284891d966e..e0b0a2ed04a31 100644 --- a/datafusion/optimizer/src/reduce_outer_join.rs +++ b/datafusion/optimizer/src/reduce_outer_join.rs @@ -449,13 +449,13 @@ mod tests { None, )? .filter(binary_expr( - cast(col("t1.b"), DataType::Int64).gt(lit(10u32)), + col("t1.b").gt(lit(10u32)), Or, col("t1.c").lt(lit(20u32)), ))? .build()?; let expected = "\ - Filter: CAST(#t1.b AS Int64) > UInt32(10) OR #t1.c < UInt32(20)\ + Filter: #t1.b > UInt32(10) OR #t1.c < UInt32(20)\ \n Inner Join: #t1.a = #t2.a\ \n TableScan: t1\ \n TableScan: t2"; @@ -478,13 +478,42 @@ mod tests { None, )? .filter(binary_expr( - try_cast(col("t1.b"), DataType::Int64).gt(lit(10u32)), + col("t1.b").gt(lit(10u32)), And, col("t2.c").lt(lit(20u32)), ))? .build()?; let expected = "\ - Filter: TRY_CAST(#t1.b AS Int64) > UInt32(10) AND #t2.c < UInt32(20)\ + Filter: #t1.b > UInt32(10) AND #t2.c < UInt32(20)\ + \n Inner Join: #t1.a = #t2.a\ + \n TableScan: t1\ + \n TableScan: t2"; + assert_optimized_plan_eq(&plan, expected); + + Ok(()) + } + + #[test] + fn reduce_full_with_type_cast() -> Result<()> { + let t1 = test_table_scan_with_name("t1")?; + let t2 = test_table_scan_with_name("t2")?; + + // reduce to inner join + let plan = LogicalPlanBuilder::from(t1) + .join( + &t2, + JoinType::Full, + (vec![Column::from_name("a")], vec![Column::from_name("a")]), + None, + )? + .filter(binary_expr( + cast(col("t1.b"), DataType::Int64).gt(lit(10u32)), + And, + try_cast(col("t2.c"), DataType::Int64).lt(lit(20u32)), + ))? + .build()?; + let expected = "\ + Filter: CAST(#t1.b AS Int64) > UInt32(10) AND TRY_CAST(#t2.c AS Int64) < UInt32(20)\ \n Inner Join: #t1.a = #t2.a\ \n TableScan: t1\ \n TableScan: t2";