From 549f10b59f78f2586bf39b3985030342853e3d0e Mon Sep 17 00:00:00 2001 From: Jason Tianyi Wang Date: Fri, 29 Oct 2021 23:37:53 +0900 Subject: [PATCH 1/5] Fix `between` in select query resolves #1196 --- datafusion/src/logical_plan/expr.rs | 26 +++++++++++++++++++++++-- datafusion/src/physical_plan/planner.rs | 15 ++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 011068d0e18b8..71767cc0e23aa 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -1935,10 +1935,32 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { Ok(format!("{} IN ({:?})", expr, list)) } } - other => Err(DataFusionError::NotImplemented(format!( + Expr::Between { + expr, + negated, + low, + high, + } => { + let expr = create_name(expr, input_schema)?; + let low = create_name(low, input_schema)?; + let high = create_name(high, input_schema)?; + if *negated { + Ok(format!("{} NOT BETWEEN {} AND {}", expr, low, high)) + } else { + Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) + } + } + Expr::Sort { + expr, + asc: _, + nulls_first: _, + } => Err(DataFusionError::Internal(format!( "Create name does not support logical expression {:?}", - other + expr ))), + Expr::Wildcard => Err(DataFusionError::Internal( + "Create name does not support wildcard".to_string(), + )), } } diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 8cfb907350b52..f0ed63ec57f9a 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -176,6 +176,21 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Ok(format!("{} IN ({:?})", expr, list)) } } + Expr::Between { + expr, + negated, + low, + high, + } => { + let expr = create_physical_name(expr, false)?; + let low = create_physical_name(low, false)?; + let high = create_physical_name(high, false)?; + if *negated { + Ok(format!("{} NOT BETWEEN {} AND {}", expr, low, high)) + } else { + Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) + } + } other => Err(DataFusionError::NotImplemented(format!( "Cannot derive physical field name for logical expression {:?}", other From fde099a251beddb798fa81a29e71df99d7ee6675 Mon Sep 17 00:00:00 2001 From: Jason Tianyi Wang Date: Wed, 3 Nov 2021 12:47:40 +0900 Subject: [PATCH 2/5] add tests --- datafusion/tests/sql.rs | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index f3dba3fc2ad17..00b8d39d659db 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -5305,3 +5305,57 @@ async fn case_with_bool_type_result() -> Result<()> { assert_eq!(expected, actual); Ok(()) } + +#[tokio::test] +async fn use_between_experssion_in_select_query() -> Result<()> { + let mut ctx = ExecutionContext::new(); + + let sql = "SELECT 1 NOT BETWEEN 3 AND 5"; + let actual = execute_to_batches(&mut ctx, sql).await; + let expected = vec![ + "+---------------+", + "| Boolean(true) |", + "+---------------+", + "| true |", + "+---------------+", + ]; + assert_batches_eq!(expected, &actual); + + let input = Int64Array::from(vec![1, 2, 3, 4]); + let batch = RecordBatch::try_from_iter(vec![("c1", Arc::new(input) as _)]).unwrap(); + let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?; + ctx.register_table("test", Arc::new(table))?; + + let sql = "SELECT abs(c1) BETWEEN 0 AND LoG(c1 * 100 ) FROM test"; + let actual = execute_to_batches(&mut ctx, sql).await; + // Expect field name to be correctly converted for expr, low and high. + let expected = vec![ + "+--------------------------------------------------------------------+", + "| abs(test.c1) BETWEEN Int64(0) AND log(test.c1 Multiply Int64(100)) |", + "+--------------------------------------------------------------------+", + "| true |", + "| true |", + "| false |", + "| false |", + "+--------------------------------------------------------------------+", + ]; + assert_batches_eq!(expected, &actual); + + let sql = "EXPLAIN SELECT c1 BETWEEN 2 AND 3 FROM test"; + let actual = execute_to_batches(&mut ctx, sql).await; + let expected = vec![ + "+---------------+-----------------------------------------------------------------------------------------+", + "| plan_type | plan |", + "+---------------+-----------------------------------------------------------------------------------------+", + "| logical_plan | Projection: #test.c1 BETWEEN Int64(2) AND Int64(3) |", + "| | TableScan: test projection=Some([0]) |", + "| physical_plan | ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as test.c1 BETWEEN Int64(2) AND Int64(3)] |", + "| | RepartitionExec: partitioning=RoundRobinBatch(8) |", + "| | MemoryExec: partitions=1, partition_sizes=[1] |", + "| | |", + "+---------------+-----------------------------------------------------------------------------------------+", + ]; + assert_batches_eq!(expected, &actual); + + Ok(()) +} From 8eff3dec094ed0293dd30adb35324617993fb6c7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 3 Nov 2021 14:25:01 -0400 Subject: [PATCH 3/5] Make test not depend on number of cores --- datafusion/tests/sql.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 8dc4ff8a2f24b..fe894d1761de2 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -5398,7 +5398,7 @@ async fn case_with_bool_type_result() -> Result<()> { } #[tokio::test] -async fn use_between_experssion_in_select_query() -> Result<()> { +async fn use_between_expression_in_select_query() -> Result<()> { let mut ctx = ExecutionContext::new(); let sql = "SELECT 1 NOT BETWEEN 3 AND 5"; @@ -5434,19 +5434,14 @@ async fn use_between_experssion_in_select_query() -> Result<()> { let sql = "EXPLAIN SELECT c1 BETWEEN 2 AND 3 FROM test"; let actual = execute_to_batches(&mut ctx, sql).await; - let expected = vec![ - "+---------------+-----------------------------------------------------------------------------------------+", - "| plan_type | plan |", - "+---------------+-----------------------------------------------------------------------------------------+", - "| logical_plan | Projection: #test.c1 BETWEEN Int64(2) AND Int64(3) |", - "| | TableScan: test projection=Some([0]) |", - "| physical_plan | ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as test.c1 BETWEEN Int64(2) AND Int64(3)] |", - "| | RepartitionExec: partitioning=RoundRobinBatch(8) |", - "| | MemoryExec: partitions=1, partition_sizes=[1] |", - "| | |", - "+---------------+-----------------------------------------------------------------------------------------+", - ]; - assert_batches_eq!(expected, &actual); + let formatted = arrow::util::pretty::pretty_format_batches(&actual).unwrap(); + + // Only test that the projection exprs arecorrect, rather than entire output + let needle = "ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as test.c1 BETWEEN Int64(2) AND Int64(3)]"; + assert_contains!(&formatted, needle); + let needle = "Projection: #test.c1 BETWEEN Int64(2) AND Int64(3)"; + assert_contains!(&formatted, needle); + Ok(()) } From 15785ee1eebd55238a77686a7745fa600b0d3caf Mon Sep 17 00:00:00 2001 From: Jason Tianyi Wang Date: Thu, 4 Nov 2021 21:50:57 +0900 Subject: [PATCH 4/5] remove the default match arm in `create_physical_name` --- datafusion/src/logical_plan/expr.rs | 9 ++++----- datafusion/src/physical_plan/planner.rs | 14 ++++++++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 9cf9722b32fd4..4cbed2abfa929 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -1988,13 +1988,12 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { } } Expr::Sort { - expr, + expr: _, asc: _, nulls_first: _, - } => Err(DataFusionError::Internal(format!( - "Create name does not support logical expression {:?}", - expr - ))), + } => Err(DataFusionError::Internal( + "Create name does not support sort expression".to_string(), + )), Expr::Wildcard => Err(DataFusionError::Internal( "Create name does not support wildcard".to_string(), )), diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 506aea0c3bfd3..95e184f295f8b 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -197,10 +197,16 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) } } - other => Err(DataFusionError::NotImplemented(format!( - "Cannot derive physical field name for logical expression {:?}", - other - ))), + Expr::Sort { + expr: _, + asc: _, + nulls_first: _, + } => Err(DataFusionError::Internal( + "Create physical name does not support sort expression".to_string(), + )), + Expr::Wildcard => Err(DataFusionError::Internal( + "Create physical name does not support wildcard".to_string(), + )), } } From 67f524dec87c87d3a60a604d0bcdf20867e65db5 Mon Sep 17 00:00:00 2001 From: Jason Tianyi Wang Date: Thu, 4 Nov 2021 23:22:09 +0900 Subject: [PATCH 5/5] make destructuring in match block simpler --- datafusion/src/logical_plan/expr.rs | 6 +----- datafusion/src/physical_plan/planner.rs | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 4cbed2abfa929..04e95e73a297e 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -1987,11 +1987,7 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) } } - Expr::Sort { - expr: _, - asc: _, - nulls_first: _, - } => Err(DataFusionError::Internal( + Expr::Sort { .. } => Err(DataFusionError::Internal( "Create name does not support sort expression".to_string(), )), Expr::Wildcard => Err(DataFusionError::Internal( diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 95e184f295f8b..006d86c42ab94 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -197,11 +197,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Ok(format!("{} BETWEEN {} AND {}", expr, low, high)) } } - Expr::Sort { - expr: _, - asc: _, - nulls_first: _, - } => Err(DataFusionError::Internal( + Expr::Sort { .. } => Err(DataFusionError::Internal( "Create physical name does not support sort expression".to_string(), )), Expr::Wildcard => Err(DataFusionError::Internal(