From 43bbeb3d6a7fff2b3085024927d089399db51399 Mon Sep 17 00:00:00 2001 From: jackwener Date: Sun, 19 Feb 2023 16:13:16 +0800 Subject: [PATCH 1/2] bugfix: fix tpcds_logical_q8 ambiguous name. --- datafusion/common/src/dfschema.rs | 29 +++++++++++++++++++++++------ datafusion/optimizer/src/utils.rs | 2 +- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 982459ac658b9..b49f073105c4d 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -284,12 +284,29 @@ impl DFSchema { match matches.len() { 0 => Err(field_not_found(None, name, self)), 1 => Ok(matches[0]), - _ => Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - qualifier: None, - name: name.to_string(), - }, - )), + _ => { + // When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem. + // Because name may generate from Alias/... . It means that it don't own qualifier. + // For example: + // Join on id = b.id + // Project a.id as id TableScan b id + // In this case, there isn't `ambiguous name` problem. When `matches` just contains + // one field without qualifier, we should return it. + let fields_without_qualifier = matches + .iter() + .filter(|f| f.qualifier.is_none()) + .collect::>(); + if fields_without_qualifier.len() == 1 { + Ok(fields_without_qualifier[0]) + } else { + Err(DataFusionError::SchemaError( + SchemaError::AmbiguousReference { + qualifier: None, + name: name.to_string(), + }, + )) + } + } } } diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 747fa62089eda..765be46912d5e 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -416,7 +416,7 @@ pub fn only_or_err(slice: &[T]) -> Result<&T> { /// Rewrites `expr` using `rewriter`, ensuring that the output has the /// same name as `expr` prior to rewrite, adding an alias if necessary. /// -/// This is important when optimzing plans to ensure the the output +/// This is important when optimizing plans to ensure the the output /// schema of plan nodes don't change after optimization pub fn rewrite_preserving_name(expr: Expr, rewriter: &mut R) -> Result where From 945a96e26cf10d738022d382b39574a34a774fbe Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 21 Feb 2023 20:55:50 +0800 Subject: [PATCH 2/2] add integration-test --- .../optimizer/tests/integration-test.rs | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/datafusion/optimizer/tests/integration-test.rs b/datafusion/optimizer/tests/integration-test.rs index eac849e347811..4702c8efd1e5d 100644 --- a/datafusion/optimizer/tests/integration-test.rs +++ b/datafusion/optimizer/tests/integration-test.rs @@ -320,6 +320,22 @@ fn push_down_filter_groupby_expr_contains_alias() { assert_eq!(expected, format!("{plan:?}")); } +#[test] +// issue: https://github.com/apache/arrow-datafusion/issues/5334 +fn test_same_name_but_not_ambiguous() { + let sql = "SELECT t1.col_int32 AS col_int32 FROM test t1 intersect SELECT col_int32 FROM test t2"; + let plan = test_sql(sql).unwrap(); + let expected = "LeftSemi Join: col_int32 = t2.col_int32\ + \n Distinct:\ + \n Projection: t1.col_int32 AS col_int32\ + \n SubqueryAlias: t1\ + \n TableScan: test projection=[col_int32]\ + \n Projection: t2.col_int32\ + \n SubqueryAlias: t2\ + \n TableScan: test projection=[col_int32]"; + assert_eq!(expected, format!("{plan:?}")); +} + fn test_sql(sql: &str) -> Result { // parse the SQL let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ... @@ -348,10 +364,7 @@ struct MySchemaProvider { } impl ContextProvider for MySchemaProvider { - fn get_table_provider( - &self, - name: TableReference, - ) -> datafusion_common::Result> { + fn get_table_provider(&self, name: TableReference) -> Result> { let table_name = name.table(); if table_name.starts_with("test") { let schema = Schema::new_with_metadata(