From a060b40f14f6a28945d0e15c2ae893136a9d55b9 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 4 Mar 2022 09:42:54 -0500 Subject: [PATCH 1/4] Add test that fails with ambiguous reference to column --- datafusion/tests/dataframe.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/datafusion/tests/dataframe.rs b/datafusion/tests/dataframe.rs index 6520e749350c2..116315e9b9b27 100644 --- a/datafusion/tests/dataframe.rs +++ b/datafusion/tests/dataframe.rs @@ -28,6 +28,7 @@ use datafusion::error::Result; use datafusion::execution::context::ExecutionContext; use datafusion::logical_plan::{col, Expr}; use datafusion::{datasource::MemTable, prelude::JoinType}; +use datafusion_expr::lit; #[tokio::test] async fn join() -> Result<()> { @@ -120,3 +121,34 @@ async fn sort_on_unprojected_columns() -> Result<()> { Ok(()) } + +#[tokio::test] +async fn filter_with_alias_overwrite() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![Arc::new(Int32Array::from_slice(&[1, 10, 10, 100]))], + ) + .unwrap(); + + let mut ctx = ExecutionContext::new(); + let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]]).unwrap(); + ctx.register_table("t", Arc::new(provider)).unwrap(); + + let df = ctx + .table("t") + .unwrap() + .select(vec![(col("a").eq(lit(10))).alias("a")]) + .unwrap() + .filter(col("a")) + .unwrap(); + let results = df.collect().await.unwrap(); + + let expected = vec![ + "+------+", "| a |", "+------+", "| true |", "| true |", "+------+", + ]; + assert_batches_eq!(expected, &results); + + Ok(()) +} From 2042fbb8e16cd5908563c0f8faafe000f02ec029 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 4 Mar 2022 10:11:44 -0500 Subject: [PATCH 2/4] Use plan schema rather than combined merged schema in filter optimization --- datafusion/src/optimizer/common_subexpr_eliminate.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/datafusion/src/optimizer/common_subexpr_eliminate.rs b/datafusion/src/optimizer/common_subexpr_eliminate.rs index 2ed45be25bc17..ae9de3dcb71fc 100644 --- a/datafusion/src/optimizer/common_subexpr_eliminate.rs +++ b/datafusion/src/optimizer/common_subexpr_eliminate.rs @@ -111,13 +111,8 @@ fn optimize(plan: &LogicalPlan, execution_props: &ExecutionProps) -> Result { - let schemas = plan.all_schemas(); - let all_schema = - schemas.into_iter().fold(DFSchema::empty(), |mut lhs, rhs| { - lhs.merge(rhs); - lhs - }); - let data_type = predicate.get_type(&all_schema)?; + let schema = plan.schema().as_ref().clone(); + let data_type = predicate.get_type(&schema)?; let mut id_array = vec![]; expr_to_identifier(predicate, &mut expr_set, &mut id_array, data_type)?; From 410a43cba143184a238c3d97ed4297c5d3d3bef3 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Mar 2022 14:46:19 -0500 Subject: [PATCH 3/4] First try to resolve predicate using plan's schema then fall back to using all schemas --- .../src/optimizer/common_subexpr_eliminate.rs | 13 ++++++++++++- testing | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/datafusion/src/optimizer/common_subexpr_eliminate.rs b/datafusion/src/optimizer/common_subexpr_eliminate.rs index ae9de3dcb71fc..895bcf4f5978d 100644 --- a/datafusion/src/optimizer/common_subexpr_eliminate.rs +++ b/datafusion/src/optimizer/common_subexpr_eliminate.rs @@ -112,7 +112,18 @@ fn optimize(plan: &LogicalPlan, execution_props: &ExecutionProps) -> Result { let schema = plan.schema().as_ref().clone(); - let data_type = predicate.get_type(&schema)?; + let data_type = if let Ok(data_type) = predicate.get_type(&schema) { + data_type + } else { + // predicate type could not be resolved in schema, fall back to all schemas + let schemas = plan.all_schemas(); + let all_schema = + schemas.into_iter().fold(DFSchema::empty(), |mut lhs, rhs| { + lhs.merge(rhs); + lhs + }); + predicate.get_type(&all_schema)? + }; let mut id_array = vec![]; expr_to_identifier(predicate, &mut expr_set, &mut id_array, data_type)?; diff --git a/testing b/testing index a8f7be3805317..b658b087767b0 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit a8f7be380531758eb7962542a5eb020d8795aa20 +Subproject commit b658b087767b041b2081766814655b4dd5a9a439 From 412a6c98bbd4797e7b867991f918911e6539d7ab Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Mar 2022 15:18:37 -0500 Subject: [PATCH 4/4] Revert testing submodule change --- testing | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing b/testing index b658b087767b0..a8f7be3805317 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit b658b087767b041b2081766814655b4dd5a9a439 +Subproject commit a8f7be380531758eb7962542a5eb020d8795aa20