-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Support non-tuple expression for in-subquery to join #4725
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,10 +95,9 @@ impl OptimizerRule for SubqueryFilterToJoin { | |
| subquery, | ||
| negated, | ||
| } => { | ||
| let right_input = self.try_optimize( | ||
| &subquery.subquery, | ||
| _config | ||
| )?.unwrap_or_else(||subquery.subquery.as_ref().clone()); | ||
| let right_input = self | ||
| .try_optimize(&subquery.subquery, _config)? | ||
| .unwrap_or_else(|| subquery.subquery.as_ref().clone()); | ||
| let right_schema = right_input.schema(); | ||
| if right_schema.fields().len() != 1 { | ||
| return Err(DataFusionError::Plan( | ||
|
|
@@ -108,13 +107,19 @@ impl OptimizerRule for SubqueryFilterToJoin { | |
| }; | ||
|
|
||
| let right_key = right_schema.field(0).qualified_column(); | ||
| let left_key = match *expr.clone() { | ||
| Expr::Column(col) => col, | ||
| _ => return Err(DataFusionError::NotImplemented( | ||
| "Filtering by expression not implemented for InSubquery" | ||
| .to_string(), | ||
| )), | ||
| }; | ||
| let left_key = *expr.clone(); | ||
| // TODO: save the predicate to join-filter and let the other rule decide it is | ||
| // a equi or non-equi predicate. | ||
| let (on, filter) = | ||
| // When left is a constant expression, like 1, | ||
| // the join predicate will be `1 = right_key`, it is better to add it to filter. | ||
| if left_key.to_columns()?.is_empty() { | ||
| let equi_expr = | ||
| Expr::eq(*expr.clone(), Expr::Column(right_key)); | ||
| (vec![], Some(equi_expr)) | ||
| } else { | ||
|
Comment on lines
+113
to
+120
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO, I think it's a good improvement but maybe isn't suitable to put in this rule.🤔 Just a thought, I think we also can add a TODO, and handle them as a future ticket.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Agree. There is another pr #4711 which will separate equi and non-equi predicate from filter. |
||
| (vec![(left_key, Expr::Column(right_key))], None) | ||
| }; | ||
|
|
||
| let join_type = if *negated { | ||
| JoinType::LeftAnti | ||
|
|
@@ -131,8 +136,8 @@ impl OptimizerRule for SubqueryFilterToJoin { | |
| Ok(LogicalPlan::Join(Join { | ||
| left: Arc::new(input), | ||
| right: Arc::new(right_input), | ||
| on: vec![(Expr::Column(left_key), Expr::Column(right_key))], | ||
| filter: None, | ||
| on, | ||
| filter, | ||
| join_type, | ||
| join_constraint: JoinConstraint::On, | ||
| schema: Arc::new(schema), | ||
|
|
@@ -143,7 +148,7 @@ impl OptimizerRule for SubqueryFilterToJoin { | |
| "Unknown expression while rewriting subquery to joins" | ||
| .to_string(), | ||
| )), | ||
| } | ||
| }, | ||
| ); | ||
|
|
||
| // In case of expressions which could not be rewritten | ||
|
|
@@ -418,4 +423,43 @@ mod tests { | |
|
|
||
| assert_optimized_plan_equal(&plan, expected) | ||
| } | ||
|
|
||
| /// Test for single IN subquery filter with expr equijoin | ||
| #[test] | ||
| fn in_subquery_to_expr_equijoin() -> Result<()> { | ||
| let table_scan = test_table_scan()?; | ||
| let plan = LogicalPlanBuilder::from(table_scan) | ||
| .filter(in_subquery( | ||
| col("c") + lit(10i32), | ||
| test_subquery_with_name("sq")?, | ||
| ))? | ||
| .project(vec![col("test.b")])? | ||
| .build()?; | ||
|
|
||
| let expected = "Projection: test.b [b:UInt32]\ | ||
| \n LeftSemi Join: test.c + Int32(10) = sq.c [a:UInt32, b:UInt32, c:UInt32]\ | ||
| \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]\ | ||
| \n Projection: sq.c [c:UInt32]\ | ||
| \n TableScan: sq [a:UInt32, b:UInt32, c:UInt32]"; | ||
|
|
||
| assert_optimized_plan_equal(&plan, expected) | ||
| } | ||
|
|
||
| /// Test for single IN subquery filter with non equijoin | ||
| #[test] | ||
| fn in_subquery_to_non_equijoin() -> Result<()> { | ||
| let table_scan = test_table_scan()?; | ||
| let plan = LogicalPlanBuilder::from(table_scan) | ||
| .filter(in_subquery(lit(10i32), test_subquery_with_name("sq")?))? | ||
| .project(vec![col("test.b")])? | ||
| .build()?; | ||
|
|
||
| let expected = "Projection: test.b [b:UInt32]\ | ||
| \n LeftSemi Join: Filter: Int32(10) = sq.c [a:UInt32, b:UInt32, c:UInt32]\ | ||
| \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]\ | ||
| \n Projection: sq.c [c:UInt32]\ | ||
| \n TableScan: sq [a:UInt32, b:UInt32, c:UInt32]"; | ||
|
|
||
| assert_optimized_plan_equal(&plan, expected) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍