From be23cef02e8c93c4e6a2419f09cfa0d46f4e47ea Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 24 Jun 2019 09:56:00 +0200 Subject: [PATCH 1/4] [SPARK-28002][SQL][FOLLOWUP] Support WITH clause column aliases --- .../test/resources/sql-tests/inputs/cte.sql | 14 ++ .../resources/sql-tests/results/cte.sql.out | 129 ++++++++++++------ 2 files changed, 101 insertions(+), 42 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index ac448eb2b27b7..db4db6c3f8c0a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -31,6 +31,20 @@ FROM CTE1 t1 WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE x = 1; +-- CTE with multiple column aliases +WITH t(x, y) AS (SELECT 1, 2) +SELECT * FROM t WHERE x = 1 AND y = 2; + +-- CTE with empty column alias list is not allowed +WITH t() AS (SELECT 1) +SELECT * FROM t; + +-- CTE with duplicate name is not allowed +WITH + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t; + -- CTE in CTE definition WITH t as ( WITH t2 AS (SELECT 1) diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index b89e29fd08dc4..c15da01d61195 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 23 +-- Number of queries: 26 -- !query 0 @@ -108,40 +108,85 @@ struct -- !query 9 +WITH t(x, y) AS (SELECT 1, 2) +SELECT * FROM t WHERE x = 1 AND y = 2 +-- !query 9 schema +struct +-- !query 9 output +1 2 + + +-- !query 10 +WITH t() AS (SELECT 1) +SELECT * FROM t +-- !query 10 schema +struct<> +-- !query 10 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input 'WITH t()'(line 1, pos 7) + +== SQL == +WITH t() AS (SELECT 1) +-------^^^ +SELECT * FROM t + + +-- !query 11 +WITH + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t +-- !query 11 schema +struct<> +-- !query 11 output +org.apache.spark.sql.catalyst.parser.ParseException + +Found duplicate keys 't'.(line 1, pos 0) + +== SQL == +WITH +^^^ + t(x) AS (SELECT 1), + t(x) AS (SELECT 2) +SELECT * FROM t + + +-- !query 12 WITH t as ( WITH t2 AS (SELECT 1) SELECT * FROM t2 ) SELECT * FROM t --- !query 9 schema +-- !query 12 schema struct<1:int> --- !query 9 output +-- !query 12 output 1 --- !query 10 +-- !query 13 SELECT max(c) FROM ( WITH t(c) AS (SELECT 1) SELECT * FROM t ) --- !query 10 schema +-- !query 13 schema struct --- !query 10 output +-- !query 13 output 1 --- !query 11 +-- !query 14 SELECT ( WITH t AS (SELECT 1) SELECT * FROM t ) --- !query 11 schema +-- !query 14 schema struct --- !query 11 output +-- !query 14 output 1 --- !query 12 +-- !query 15 WITH t AS (SELECT 1), t2 AS ( @@ -149,13 +194,13 @@ WITH SELECT * FROM t ) SELECT * FROM t2 --- !query 12 schema +-- !query 15 schema struct<1:int> --- !query 12 output +-- !query 15 output 1 --- !query 13 +-- !query 16 WITH t(c) AS (SELECT 1), t2 AS ( @@ -167,13 +212,13 @@ WITH ) ) SELECT * FROM t2 --- !query 13 schema +-- !query 16 schema struct --- !query 13 output +-- !query 16 output 1 --- !query 14 +-- !query 17 WITH t AS (SELECT 1), t2 AS ( @@ -185,25 +230,25 @@ WITH SELECT * FROM t2 ) SELECT * FROM t2 --- !query 14 schema +-- !query 17 schema struct<2:int> --- !query 14 output +-- !query 17 output 2 --- !query 15 +-- !query 18 WITH t(c) AS (SELECT 1) SELECT max(c) FROM ( WITH t(c) AS (SELECT 2) SELECT * FROM t ) --- !query 15 schema +-- !query 18 schema struct --- !query 15 output +-- !query 18 output 2 --- !query 16 +-- !query 19 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( SELECT max(c) AS c FROM ( @@ -211,13 +256,13 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 16 schema +-- !query 19 schema struct --- !query 16 output +-- !query 19 output 2 --- !query 17 +-- !query 20 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( WITH t(c) AS (SELECT 2) @@ -226,25 +271,25 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 17 schema +-- !query 20 schema struct --- !query 17 output +-- !query 20 output 3 --- !query 18 +-- !query 21 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) SELECT * FROM t ) --- !query 18 schema +-- !query 21 schema struct --- !query 18 output +-- !query 21 output 1 --- !query 19 +-- !query 22 WITH t AS (SELECT 1) SELECT ( SELECT ( @@ -252,13 +297,13 @@ SELECT ( SELECT * FROM t ) ) --- !query 19 schema +-- !query 22 schema struct --- !query 19 output +-- !query 22 output 1 --- !query 20 +-- !query 23 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) @@ -267,23 +312,23 @@ SELECT ( SELECT * FROM t ) ) --- !query 20 schema +-- !query 23 schema struct --- !query 20 output +-- !query 23 output 1 --- !query 21 +-- !query 24 DROP VIEW IF EXISTS t --- !query 21 schema +-- !query 24 schema struct<> --- !query 21 output +-- !query 24 output --- !query 22 +-- !query 25 DROP VIEW IF EXISTS t2 --- !query 22 schema +-- !query 25 schema struct<> --- !query 22 output +-- !query 25 output From 2250c5b492154c3bd504c62dadd78a746a648bfc Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 24 Jun 2019 20:57:20 +0200 Subject: [PATCH 2/4] fix review findings --- .../sql/catalyst/parser/AstBuilder.scala | 7 +++- .../test/resources/sql-tests/inputs/cte.sql | 6 +++- .../resources/sql-tests/results/cte.sql.out | 36 ++++++++++++++++++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5eef8dbdfbffc..6c5ad55e88bea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -129,7 +129,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging (namedQuery.alias, namedQuery) } // Check for duplicate names. - checkDuplicateKeys(ctes, ctx) + val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys + if (duplicates.nonEmpty) { + throw new ParseException( + s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.", + ctx) + } With(plan, ctes) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index db4db6c3f8c0a..1a64e7a1c409c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -35,11 +35,15 @@ SELECT * FROM t WHERE x = 1; WITH t(x, y) AS (SELECT 1, 2) SELECT * FROM t WHERE x = 1 AND y = 2; +-- CTE with duplicate column aliases +WITH t(x, x) AS (SELECT 1, 2) +SELECT * FROM t; + -- CTE with empty column alias list is not allowed WITH t() AS (SELECT 1) SELECT * FROM t; --- CTE with duplicate name is not allowed +-- CTE with duplicate names is not allowed WITH t(x) AS (SELECT 1), t(x) AS (SELECT 2) diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index c15da01d61195..05f4e75b3ae65 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,5 +1,9 @@ -- Automatically generated by SQLQueryTestSuite +<<<<<<< HEAD -- Number of queries: 26 +======= +-- Number of queries: 14 +>>>>>>> fix review findings -- !query 0 @@ -116,9 +120,24 @@ struct 1 2 +<<<<<<< HEAD -- !query 10 WITH t() AS (SELECT 1) SELECT * FROM t +======= +-- !query 9 +WITH t(x, x) AS (SELECT 1, 2) +SELECT * FROM t +-- !query 9 schema +struct +-- !query 9 output +1 2 + + +-- !query 10 +WITH t() AS (SELECT 1) +SELECT * FROM t +>>>>>>> fix review findings -- !query 10 schema struct<> -- !query 10 output @@ -142,7 +161,7 @@ struct<> -- !query 11 output org.apache.spark.sql.catalyst.parser.ParseException -Found duplicate keys 't'.(line 1, pos 0) +CTE definition can't have duplicate names: 't'.(line 1, pos 0) == SQL == WITH @@ -153,6 +172,7 @@ SELECT * FROM t -- !query 12 +<<<<<<< HEAD WITH t as ( WITH t2 AS (SELECT 1) SELECT * FROM t2 @@ -331,4 +351,18 @@ DROP VIEW IF EXISTS t2 -- !query 25 schema struct<> -- !query 25 output +======= +DROP VIEW IF EXISTS t +-- !query 12 schema +struct<> +-- !query 12 output + + + +-- !query 13 +DROP VIEW IF EXISTS t2 +-- !query 13 schema +struct<> +-- !query 13 output +>>>>>>> fix review findings From 56ab43496e291a7eeecb736cd5abedabdbf5eb24 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 25 Jun 2019 08:40:24 +0200 Subject: [PATCH 3/4] fix UT failure, better comment --- .../org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala | 2 +- sql/core/src/test/resources/sql-tests/inputs/cte.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index d48da4ab32168..fb245eef5e4be 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -100,7 +100,7 @@ class PlanParserSuite extends AnalysisTest { "cte2" -> ((table("cte1").select(star()), Seq.empty)))) intercept( "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1", - "Found duplicate keys 'cte1'") + "CTE definition can't have duplicate names: 'cte1'.") } test("simple select query") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index 1a64e7a1c409c..d0e145c35a9fe 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -43,7 +43,7 @@ SELECT * FROM t; WITH t() AS (SELECT 1) SELECT * FROM t; --- CTE with duplicate names is not allowed +-- CTEs with duplicate names are not allowed WITH t(x) AS (SELECT 1), t(x) AS (SELECT 2) From 0c39fb2d449c91952cdeae759d4bbaaf2d35c1ee Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 5 Jul 2019 12:45:08 +0200 Subject: [PATCH 4/4] rebase on master --- .../resources/sql-tests/results/cte.sql.out | 123 +++++++----------- 1 file changed, 49 insertions(+), 74 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 05f4e75b3ae65..9e90908d92faf 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -1,9 +1,5 @@ -- Automatically generated by SQLQueryTestSuite -<<<<<<< HEAD --- Number of queries: 26 -======= --- Number of queries: 14 ->>>>>>> fix review findings +-- Number of queries: 27 -- !query 0 @@ -120,27 +116,21 @@ struct 1 2 -<<<<<<< HEAD -- !query 10 -WITH t() AS (SELECT 1) -SELECT * FROM t -======= --- !query 9 WITH t(x, x) AS (SELECT 1, 2) SELECT * FROM t --- !query 9 schema +-- !query 10 schema struct --- !query 9 output +-- !query 10 output 1 2 --- !query 10 +-- !query 11 WITH t() AS (SELECT 1) SELECT * FROM t ->>>>>>> fix review findings --- !query 10 schema +-- !query 11 schema struct<> --- !query 10 output +-- !query 11 output org.apache.spark.sql.catalyst.parser.ParseException no viable alternative at input 'WITH t()'(line 1, pos 7) @@ -151,14 +141,14 @@ WITH t() AS (SELECT 1) SELECT * FROM t --- !query 11 +-- !query 12 WITH t(x) AS (SELECT 1), t(x) AS (SELECT 2) SELECT * FROM t --- !query 11 schema +-- !query 12 schema struct<> --- !query 11 output +-- !query 12 output org.apache.spark.sql.catalyst.parser.ParseException CTE definition can't have duplicate names: 't'.(line 1, pos 0) @@ -171,42 +161,41 @@ WITH SELECT * FROM t --- !query 12 -<<<<<<< HEAD +-- !query 13 WITH t as ( WITH t2 AS (SELECT 1) SELECT * FROM t2 ) SELECT * FROM t --- !query 12 schema +-- !query 13 schema struct<1:int> --- !query 12 output +-- !query 13 output 1 --- !query 13 +-- !query 14 SELECT max(c) FROM ( WITH t(c) AS (SELECT 1) SELECT * FROM t ) --- !query 13 schema +-- !query 14 schema struct --- !query 13 output +-- !query 14 output 1 --- !query 14 +-- !query 15 SELECT ( WITH t AS (SELECT 1) SELECT * FROM t ) --- !query 14 schema +-- !query 15 schema struct --- !query 14 output +-- !query 15 output 1 --- !query 15 +-- !query 16 WITH t AS (SELECT 1), t2 AS ( @@ -214,13 +203,13 @@ WITH SELECT * FROM t ) SELECT * FROM t2 --- !query 15 schema +-- !query 16 schema struct<1:int> --- !query 15 output +-- !query 16 output 1 --- !query 16 +-- !query 17 WITH t(c) AS (SELECT 1), t2 AS ( @@ -232,13 +221,13 @@ WITH ) ) SELECT * FROM t2 --- !query 16 schema +-- !query 17 schema struct --- !query 16 output +-- !query 17 output 1 --- !query 17 +-- !query 18 WITH t AS (SELECT 1), t2 AS ( @@ -250,25 +239,25 @@ WITH SELECT * FROM t2 ) SELECT * FROM t2 --- !query 17 schema +-- !query 18 schema struct<2:int> --- !query 17 output +-- !query 18 output 2 --- !query 18 +-- !query 19 WITH t(c) AS (SELECT 1) SELECT max(c) FROM ( WITH t(c) AS (SELECT 2) SELECT * FROM t ) --- !query 18 schema +-- !query 19 schema struct --- !query 18 output +-- !query 19 output 2 --- !query 19 +-- !query 20 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( SELECT max(c) AS c FROM ( @@ -276,13 +265,13 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 19 schema +-- !query 20 schema struct --- !query 19 output +-- !query 20 output 2 --- !query 20 +-- !query 21 WITH t(c) AS (SELECT 1) SELECT sum(c) FROM ( WITH t(c) AS (SELECT 2) @@ -291,25 +280,25 @@ SELECT sum(c) FROM ( SELECT * FROM t ) ) --- !query 20 schema +-- !query 21 schema struct --- !query 20 output +-- !query 21 output 3 --- !query 21 +-- !query 22 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) SELECT * FROM t ) --- !query 21 schema +-- !query 22 schema struct --- !query 21 output +-- !query 22 output 1 --- !query 22 +-- !query 23 WITH t AS (SELECT 1) SELECT ( SELECT ( @@ -317,13 +306,13 @@ SELECT ( SELECT * FROM t ) ) --- !query 22 schema +-- !query 23 schema struct --- !query 22 output +-- !query 23 output 1 --- !query 23 +-- !query 24 WITH t AS (SELECT 1) SELECT ( WITH t AS (SELECT 2) @@ -332,37 +321,23 @@ SELECT ( SELECT * FROM t ) ) --- !query 23 schema -struct --- !query 23 output -1 - - --- !query 24 -DROP VIEW IF EXISTS t -- !query 24 schema -struct<> +struct -- !query 24 output - +1 -- !query 25 -DROP VIEW IF EXISTS t2 +DROP VIEW IF EXISTS t -- !query 25 schema struct<> -- !query 25 output -======= -DROP VIEW IF EXISTS t --- !query 12 schema -struct<> --- !query 12 output --- !query 13 +-- !query 26 DROP VIEW IF EXISTS t2 --- !query 13 schema +-- !query 26 schema struct<> --- !query 13 output ->>>>>>> fix review findings +-- !query 26 output