From 49aba06f601433dd7d7b26202637a5cf998e8d32 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 14 Jul 2022 16:01:58 -0600 Subject: [PATCH 1/3] unit test to reproduce the bug --- .../optimizer/src/common_subexpr_eliminate.rs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 3964bee6b72b1..076622ce5cabb 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -699,6 +699,7 @@ fn replace_common_expr( mod test { use super::*; use crate::test::*; + use datafusion_expr::logical_plan::JoinType; use datafusion_expr::{ avg, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, sum, Operator, @@ -890,4 +891,30 @@ mod test { assert!(field_set.insert(field.qualified_name())); } } + + #[test] + fn redundant_project_fields_join_input() { + let table_scan_1 = test_table_scan_with_name("test1").unwrap(); + let table_scan_2 = test_table_scan_with_name("test2").unwrap(); + let join = LogicalPlanBuilder::from(table_scan_1) + .join(&table_scan_2, JoinType::Inner, (vec!["a"], vec!["a"]), None) + .unwrap() + .build() + .unwrap(); + let affected_id: HashSet = + ["c+a".to_string(), "d+a".to_string()].into_iter().collect(); + let expr_set = [ + ("c+a".to_string(), (col("c+a"), 1, DataType::UInt32)), + ("d+a".to_string(), (col("d+a"), 1, DataType::UInt32)), + ] + .into_iter() + .collect(); + let project = build_project_plan(join, affected_id.clone(), &expr_set).unwrap(); + let project_2 = build_project_plan(project, affected_id, &expr_set).unwrap(); + + let mut field_set = HashSet::new(); + for field in project_2.schema().fields() { + assert!(field_set.insert(field.qualified_name())); + } + } } From fe2baa26f8ab6d376fe0d2830ea3af398a8504b1 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 14 Jul 2022 16:08:51 -0600 Subject: [PATCH 2/3] fix the bug --- datafusion/optimizer/src/common_subexpr_eliminate.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 076622ce5cabb..141c8813f2775 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -282,8 +282,7 @@ fn build_project_plan( } for field in input.schema().fields() { - if !fields_set.contains(field.name()) { - fields_set.insert(field.name().to_owned()); + if fields_set.insert(field.qualified_name()) { fields.push(field.clone()); project_exprs.push(Expr::Column(field.qualified_column())); } From d9f18fa216a5f3be40cca410eacbace4adf67efd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 14 Jul 2022 16:19:13 -0600 Subject: [PATCH 3/3] remove redundant merge of input schema --- datafusion/optimizer/src/common_subexpr_eliminate.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 141c8813f2775..8627b404dce85 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -288,8 +288,7 @@ fn build_project_plan( } } - let mut schema = DFSchema::new_with_metadata(fields, HashMap::new())?; - schema.merge(input.schema()); + let schema = DFSchema::new_with_metadata(fields, HashMap::new())?; Ok(LogicalPlan::Projection(Projection::try_new_with_schema( project_exprs,