From ef5603ff9fca5a6011429dad6308a72c7c8e4708 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Fri, 16 Jul 2021 18:18:11 +0200 Subject: [PATCH 1/8] Support column alias in FROM --- benchmarks/src/bin/tpch.rs | 5 ++ datafusion/src/sql/planner.rs | 101 ++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 169319d30beef..aee3a377ab252 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -751,6 +751,11 @@ mod tests { run_query(12).await } + // #[tokio::test] + // async fn run_q13() -> Result<()> { + // run_query(13).await + // } + #[tokio::test] async fn run_q14() -> Result<()> { run_query(14).await diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 41b4e20f15f3d..37a9f4cf110ef 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -424,46 +424,77 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { relation: &TableFactor, ctes: &mut HashMap, ) -> Result { - match relation { + let (plan, columns_alias) = match relation { TableFactor::Table { name, alias, .. } => { let table_name = name.to_string(); let cte = ctes.get(&table_name); - match ( - cte, - self.schema_provider.get_table_provider(name.try_into()?), - ) { - (Some(cte_plan), _) => Ok(cte_plan.clone()), - (_, Some(provider)) => LogicalPlanBuilder::scan( - // take alias into account to support `JOIN table1 as table2` - alias - .as_ref() - .map(|a| a.name.value.as_str()) - .unwrap_or(&table_name), - provider, - None, - )? - .build(), - (None, None) => Err(DataFusionError::Plan(format!( - "Table or CTE with name '{}' not found", - name - ))), - } + let columns_alias = alias.clone().map(|x| x.columns); + ( + match ( + cte, + self.schema_provider.get_table_provider(name.try_into()?), + ) { + (Some(cte_plan), _) => Ok(cte_plan.clone()), + (_, Some(provider)) => LogicalPlanBuilder::scan( + // take alias into account to support `JOIN table1 as table2` + alias + .as_ref() + .map(|a| a.name.value.as_str()) + .unwrap_or(&table_name), + provider, + None, + )? + .build(), + (None, None) => Err(DataFusionError::Plan(format!( + "Table or CTE with name '{}' not found", + name + ))), + }?, + columns_alias, + ) } TableFactor::Derived { subquery, alias, .. - } => self.query_to_plan_with_alias( - subquery, - alias.as_ref().map(|a| a.name.value.to_string()), - ctes, + } => ( + self.query_to_plan_with_alias( + subquery, + alias.as_ref().map(|a| a.name.value.to_string()), + ctes, + )?, + alias.clone().map(|x| x.columns), ), TableFactor::NestedJoin(table_with_joins) => { - self.plan_table_with_joins(table_with_joins, ctes) + (self.plan_table_with_joins(table_with_joins, ctes)?, None) } // @todo Support TableFactory::TableFunction? - _ => Err(DataFusionError::NotImplemented(format!( - "Unsupported ast node {:?} in create_relation", - relation - ))), + _ => { + return Err(DataFusionError::NotImplemented(format!( + "Unsupported ast node {:?} in create_relation", + relation + ))) + } + }; + + if let Some(columns_alias) = columns_alias { + if columns_alias.len() != plan.schema().fields().len() { + return Err(DataFusionError::Plan(format!( + "Source table contains {} fields but column alias {}", + plan.schema().fields().len(), + columns_alias.len(), + ))); + } else { + let fields = plan.schema().fields().clone(); + LogicalPlanBuilder::from(plan) + .project( + fields + .iter() + .zip(columns_alias.iter()) + .map(|(field, ident)| col(field.name()).alias(&ident.value)), + )? + .build() + } + } else { + Ok(plan) } } @@ -2063,6 +2094,16 @@ mod tests { quick_test(sql, expected); } + #[test] + fn table_with_column_alias() { + let sql = "SELECT a, b, c + FROM lineitem l (a, b, c)"; + let expected = "Projection: #a, #b, #c\ + \n Projection: #l.l_item_id AS a, #l.l_description AS b, #l.price AS c\ + \n TableScan: lineitem projection=None"; + quick_test(sql, expected); + } + #[test] fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() { let sql = "SELECT first_name, MAX(age) From 237d485865dc2950489ecafd637913694647310e Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Fri, 16 Jul 2021 18:22:23 +0200 Subject: [PATCH 2/8] Remove q13 --- benchmarks/src/bin/tpch.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index aee3a377ab252..9c1157b91c59a 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -751,10 +751,6 @@ mod tests { run_query(12).await } - // #[tokio::test] - // async fn run_q13() -> Result<()> { - // run_query(13).await - // } #[tokio::test] async fn run_q14() -> Result<()> { From c8c6ed6c5665b5ec819fcbfb501ab8a65a22736e Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 08:40:33 +0200 Subject: [PATCH 3/8] Test on error --- benchmarks/src/bin/tpch.rs | 1 - datafusion/src/sql/planner.rs | 31 +++++++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 9c1157b91c59a..169319d30beef 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -751,7 +751,6 @@ mod tests { run_query(12).await } - #[tokio::test] async fn run_q14() -> Result<()> { run_query(14).await diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 37a9f4cf110ef..16af805e81e9b 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1915,6 +1915,27 @@ mod tests { quick_test(sql, expected); } + #[test] + fn table_with_column_alias() { + let sql = "SELECT a, b, c + FROM lineitem l (a, b, c)"; + let expected = "Projection: #a, #b, #c\ + \n Projection: #l.l_item_id AS a, #l.l_description AS b, #l.price AS c\ + \n TableScan: lineitem projection=None"; + quick_test(sql, expected); + } + + #[test] + fn table_with_column_alias_number_cols() { + let sql = "SELECT a, b, c + FROM lineitem l (a, b)"; + let err = logical_plan(sql).expect_err("query should have failed"); + assert_eq!( + "Plan(\"Source table contains 3 fields but column alias 2\")", + format!("{:?}", err) + ); + } + #[test] fn select_with_having() { let sql = "SELECT id, age @@ -2094,16 +2115,6 @@ mod tests { quick_test(sql, expected); } - #[test] - fn table_with_column_alias() { - let sql = "SELECT a, b, c - FROM lineitem l (a, b, c)"; - let expected = "Projection: #a, #b, #c\ - \n Projection: #l.l_item_id AS a, #l.l_description AS b, #l.price AS c\ - \n TableScan: lineitem projection=None"; - quick_test(sql, expected); - } - #[test] fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() { let sql = "SELECT first_name, MAX(age) From 7bc96f1f65a1f7c9ca69fe88c0cf89e7e1fdedeb Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 14:16:09 +0200 Subject: [PATCH 4/8] Don't convert empty list --- datafusion/src/sql/planner.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 16af805e81e9b..2afd04e66b3c7 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -482,6 +482,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan.schema().fields().len(), columns_alias.len(), ))); + } else if columns_alias.is_empty() { + // sqlparser-rs encodes AS t as an empty list of column alias + Ok(plan) } else { let fields = plan.schema().fields().clone(); LogicalPlanBuilder::from(plan) From 4b118bef23c10fcf009ef2d517b15d72f7d48c30 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 14:24:39 +0200 Subject: [PATCH 5/8] Don't convert empty list --- datafusion/src/sql/planner.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 2afd04e66b3c7..1f0d974663382 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -476,15 +476,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }; if let Some(columns_alias) = columns_alias { - if columns_alias.len() != plan.schema().fields().len() { + if columns_alias.is_empty() { + // sqlparser-rs encodes AS t as an empty list of column alias + Ok(plan) + } else if columns_alias.len() != plan.schema().fields().len() { return Err(DataFusionError::Plan(format!( "Source table contains {} fields but column alias {}", plan.schema().fields().len(), columns_alias.len(), ))); - } else if columns_alias.is_empty() { - // sqlparser-rs encodes AS t as an empty list of column alias - Ok(plan) } else { let fields = plan.schema().fields().clone(); LogicalPlanBuilder::from(plan) From 37a98bbef92e2750a6bfcb507e5f6220065cc5a8 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 14:35:18 +0200 Subject: [PATCH 6/8] Fix test --- datafusion/src/sql/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 1f0d974663382..2261ba8032d95 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1924,7 +1924,7 @@ mod tests { FROM lineitem l (a, b, c)"; let expected = "Projection: #a, #b, #c\ \n Projection: #l.l_item_id AS a, #l.l_description AS b, #l.price AS c\ - \n TableScan: lineitem projection=None"; + \n TableScan: l projection=None"; quick_test(sql, expected); } From 1693ce3a50a074c721e7517b9fbaca38d19fcd7e Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 14:51:33 +0200 Subject: [PATCH 7/8] Improve error message --- datafusion/src/sql/planner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 2261ba8032d95..75a5552568520 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -481,7 +481,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(plan) } else if columns_alias.len() != plan.schema().fields().len() { return Err(DataFusionError::Plan(format!( - "Source table contains {} fields but column alias {}", + "Source table contains {} columns but only {} names given as column alias", plan.schema().fields().len(), columns_alias.len(), ))); @@ -1934,7 +1934,7 @@ mod tests { FROM lineitem l (a, b)"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Source table contains 3 fields but column alias 2\")", + "Plan(\"Source table contains 2 columns but only 3 names given as column alias\")", format!("{:?}", err) ); } From 6e179d02ce7fcb05e24acd6f696de0ad9abe217b Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 17 Jul 2021 15:05:43 +0200 Subject: [PATCH 8/8] Improve error message --- datafusion/src/sql/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 75a5552568520..1437346fccd3a 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1934,7 +1934,7 @@ mod tests { FROM lineitem l (a, b)"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Source table contains 2 columns but only 3 names given as column alias\")", + "Plan(\"Source table contains 3 columns but only 2 names given as column alias\")", format!("{:?}", err) ); }