From b088d1e2e890538459ff2693f96d7baafbaef4f1 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 4 Jun 2024 15:40:19 -0700 Subject: [PATCH] allow outer references in un-referenced CTE relations --- .../sql/catalyst/analysis/CheckAnalysis.scala | 7 ++++ .../plans/logical/basicLogicalOperators.scala | 4 +++ .../analyzer-results/cte-legacy.sql.out | 24 +++++++++++++ .../analyzer-results/cte-nested.sql.out | 34 +++++++++++++++++++ .../analyzer-results/cte-nonlegacy.sql.out | 34 +++++++++++++++++++ .../resources/sql-tests/inputs/cte-nested.sql | 12 +++++++ .../sql-tests/results/cte-legacy.sql.out | 22 ++++++++++++ .../sql-tests/results/cte-nested.sql.out | 22 ++++++++++++ .../sql-tests/results/cte-nonlegacy.sql.out | 22 ++++++++++++ 9 files changed, 181 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 8c380a7228c6b..f4408220ac939 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -1371,6 +1371,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB aggregated, canContainOuter && SQLConf.get.getConf(SQLConf.DECORRELATE_OFFSET_ENABLED)) + // We always inline CTE relations before analysis check, and only un-referenced CTE + // relations will be kept in the plan. Here we should simply skip them and check the + // children, as un-referenced CTE relations won't be executed anyway and doesn't need to + // be restricted by the current subquery correlation limitations. + case _: WithCTE | _: CTERelationDef => + plan.children.foreach(p => checkPlan(p, aggregated, canContainOuter)) + // Category 4: Any other operators not in the above 3 categories // cannot be on a correlation path, that is they are allowed only // under a correlation point but they and their descendant operators diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 9242a06cf1d6e..0135fcfb3cc8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -911,6 +911,10 @@ case class WithCTE(plan: LogicalPlan, cteDefs: Seq[CTERelationDef]) extends Logi def withNewPlan(newPlan: LogicalPlan): WithCTE = { withNewChildren(children.init :+ newPlan).asInstanceOf[WithCTE] } + + override def maxRows: Option[Long] = plan.maxRows + + override def maxRowsPerPartition: Option[Long] = plan.maxRowsPerPartition } /** diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out index 594a30b054edd..f9b78e94236fb 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out @@ -43,6 +43,30 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x] +- OneRowRelation +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [] AS scalarsubquery()#x] +: +- Project [1 AS 1#x] +: +- OneRowRelation ++- Range (0, 1, step=1) + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL] +: +- Project [outer(id#xL)] +: +- OneRowRelation ++- Range (0, 1, step=1) + + -- !query SELECT * FROM ( diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out index f1a302b06f2a8..3a9fc5ea1297f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out @@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x] +- OneRowRelation +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias unreferenced +: : +- Project [outer(id#xL)] +: : +- OneRowRelation +: +- Project [1 AS 1#x] +: +- OneRowRelation ++- Range (0, 1, step=1) + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias unreferenced +: : +- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [outer(id#xL)] +: +- OneRowRelation ++- Range (0, 1, step=1) + + -- !query SELECT * FROM ( diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out index 6e55c6fa83cd9..e8640c3cbb6bd 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out @@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x] +- OneRowRelation +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias unreferenced +: : +- Project [outer(id#xL)] +: : +- OneRowRelation +: +- Project [1 AS 1#x] +: +- OneRowRelation ++- Range (0, 1, step=1) + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query analysis +Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias unreferenced +: : +- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [outer(id#xL)] +: +- OneRowRelation ++- Range (0, 1, step=1) + + -- !query SELECT * FROM ( diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql index e5ef244341751..3b2ba1fcdd66e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql @@ -17,6 +17,18 @@ SELECT ( SELECT * FROM t ); +-- un-referenced CTE in subquery expression: outer reference in CTE relation +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1); + +-- un-referenced CTE in subquery expression: outer reference in CTE main query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1); + -- Make sure CTE in subquery is scoped to that subquery rather than global -- the 2nd half of the union should fail because the cte is scoped to the first half SELECT * FROM diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out index b79d8b1afb0d4..1255e8b51f301 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out @@ -33,6 +33,28 @@ struct 1 +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query schema +struct +-- !query output +0 + + -- !query SELECT * FROM ( diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out index a93bcb7593768..7cf488ce8cad4 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out @@ -33,6 +33,28 @@ struct 1 +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query schema +struct +-- !query output +0 + + -- !query SELECT * FROM ( diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out index ba311c0253ab1..94ef47397eff1 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out @@ -33,6 +33,28 @@ struct 1 +-- !query +SELECT ( + WITH unreferenced AS (SELECT id) + SELECT 1 +) FROM range(1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT ( + WITH unreferenced AS (SELECT 1) + SELECT id +) FROM range(1) +-- !query schema +struct +-- !query output +0 + + -- !query SELECT * FROM (