From 2293cb01182727a8fedbdf2cd770ff5105446496 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 27 Apr 2022 06:59:23 -0600 Subject: [PATCH 1/2] Add SQL query planner support for IN subqueries --- datafusion/core/src/sql/planner.rs | 59 ++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs index e693d7b209c87..c283da4b369e1 100644 --- a/datafusion/core/src/sql/planner.rs +++ b/datafusion/core/src/sql/planner.rs @@ -1882,9 +1882,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Exists(subquery) => self.parse_exists_subquery(&subquery, false, schema), - SQLExpr::InSubquery { .. } => Err(DataFusionError::NotImplemented( - "IN subqueries are not supported yet".to_owned(), - )), + SQLExpr::InSubquery { expr, subquery, negated } => self.parse_in_subquery(expr, &subquery, negated, schema), SQLExpr::Subquery(_) => Err(DataFusionError::NotImplemented( "Scalar subqueries are not supported yet".to_owned(), @@ -1913,6 +1911,24 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) } + fn parse_in_subquery( + &self, + expr: Box, + subquery: &Query, + negated: bool, + input_schema: &DFSchema, + ) -> Result { + Ok(Expr::InSubquery { + expr: Box::new(self.sql_to_rex(*expr.to_owned(), input_schema)?), + subquery: Subquery { + subquery: Arc::new( + self.subquery_to_plan(subquery.clone(), input_schema)?, + ), + }, + negated, + }) + } + fn function_args_to_expr( &self, args: Vec, @@ -4311,4 +4327,41 @@ mod tests { ); quick_test(sql, &expected); } + + #[test] + fn in_subquery_uncorrelated() { + let sql = "SELECT id FROM person p WHERE id IN \ + (SELECT id FROM person)"; + + let subquery_expected = "Subquery: Projection: #person.id\ + \n TableScan: person projection=None"; + + let expected = format!( + "Projection: #p.id\ + \n Filter: #p.id IN ({})\ + \n SubqueryAlias: p\ + \n TableScan: person projection=None", + subquery_expected + ); + quick_test(sql, &expected); + } + + #[test] + fn not_in_subquery_correlated() { + let sql = "SELECT id FROM person p WHERE id NOT IN \ + (SELECT id FROM person WHERE last_name = p.last_name AND state = 'CO')"; + + let subquery_expected = "Subquery: Projection: #person.id\ + \n Filter: #person.last_name = #p.last_name AND #person.state = Utf8(\"CO\")\ + \n TableScan: person projection=None"; + + let expected = format!( + "Projection: #p.id\ + \n Filter: #p.id NOT IN ({})\ + \n SubqueryAlias: p\ + \n TableScan: person projection=None", + subquery_expected + ); + quick_test(sql, &expected); + } } From dee8f621ae01fcf626454d13735a55303e8e4be9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 27 Apr 2022 07:12:56 -0600 Subject: [PATCH 2/2] clippy --- datafusion/core/src/sql/planner.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs index c283da4b369e1..a1727fcaa075b 100644 --- a/datafusion/core/src/sql/planner.rs +++ b/datafusion/core/src/sql/planner.rs @@ -1882,7 +1882,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Exists(subquery) => self.parse_exists_subquery(&subquery, false, schema), - SQLExpr::InSubquery { expr, subquery, negated } => self.parse_in_subquery(expr, &subquery, negated, schema), + SQLExpr::InSubquery { expr, subquery, negated } => self.parse_in_subquery(&expr, &subquery, negated, schema), SQLExpr::Subquery(_) => Err(DataFusionError::NotImplemented( "Scalar subqueries are not supported yet".to_owned(), @@ -1913,13 +1913,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn parse_in_subquery( &self, - expr: Box, + expr: &SQLExpr, subquery: &Query, negated: bool, input_schema: &DFSchema, ) -> Result { Ok(Expr::InSubquery { - expr: Box::new(self.sql_to_rex(*expr.to_owned(), input_schema)?), + expr: Box::new(self.sql_to_rex(expr.clone(), input_schema)?), subquery: Subquery { subquery: Arc::new( self.subquery_to_plan(subquery.clone(), input_schema)?,