From 91a5fdaa6a00b4010aa41df00f0bc2d479c74845 Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Wed, 18 Jul 2018 18:54:58 +0200 Subject: [PATCH 1/6] Validate that limit clause cannot have a nullable expression --- .../sql/catalyst/analysis/CheckAnalysis.scala | 3 ++ .../analysis/AnalysisErrorSuite.scala | 6 ++++ .../test/resources/sql-tests/inputs/limit.sql | 3 ++ .../resources/sql-tests/results/limit.sql.out | 33 ++++++++++++------- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index af256b98b34f3..24a8ddf0a697e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -62,6 +62,9 @@ trait CheckAnalysis extends PredicateHelper { private def checkLimitClause(limitExpr: Expression): Unit = { limitExpr match { + case e if e.nullable => failAnalysis( + "The limit expression must not be nullable, but got " + + limitExpr.sql) case e if !e.foldable => failAnalysis( "The limit expression must evaluate to a constant value, but got " + limitExpr.sql) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 0ce94d39e994a..bc404d5c1f949 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -399,6 +399,12 @@ class AnalysisErrorSuite extends AnalysisTest { "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil ) + errorTest( + "limit clause must not be nullable", + testRelation.limit(Literal(null, IntegerType)), + "The limit expression must not be nullable, but got " :: Nil + ) + errorTest( "num_rows in limit clause must be equal to or greater than 0", listRelation.limit(-1), diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index f21912a042716..0eb5080ef6d07 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -13,6 +13,9 @@ SELECT * FROM testdata LIMIT CAST(1 AS int); SELECT * FROM testdata LIMIT -1; SELECT * FROM testData TABLESAMPLE (-1 ROWS); +-- limit may not be nullable +SELECT * FROM testdata LIMIT CAST(NULL AS INT); + -- limit must be foldable SELECT * FROM testdata LIMIT key > 3; diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 146abe6cbd058..7affa2b886fbc 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 12 +-- Number of queries: 13 -- !query 0 @@ -66,44 +66,53 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 7 -SELECT * FROM testdata LIMIT key > 3 +SELECT * FROM testdata LIMIT CAST(NULL AS INT) -- !query 7 schema struct<> -- !query 7 output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The limit expression must not be nullable, but got CAST(NULL AS INT); -- !query 8 -SELECT * FROM testdata LIMIT true +SELECT * FROM testdata LIMIT key > 3 -- !query 8 schema struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got boolean; +The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); -- !query 9 -SELECT * FROM testdata LIMIT 'a' +SELECT * FROM testdata LIMIT true -- !query 9 schema struct<> -- !query 9 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got string; +The limit expression must be integer type, but got boolean; -- !query 10 -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 +SELECT * FROM testdata LIMIT 'a' -- !query 10 schema -struct +struct<> -- !query 10 output -4 +org.apache.spark.sql.AnalysisException +The limit expression must be integer type, but got string; -- !query 11 -SELECT * FROM testdata WHERE key < 3 LIMIT ALL +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 -- !query 11 schema -struct +struct -- !query 11 output +4 + + +-- !query 12 +SELECT * FROM testdata WHERE key < 3 LIMIT ALL +-- !query 12 schema +struct +-- !query 12 output 1 1 2 2 From f84ff237cb83cff1dfb65d21eca5d64d1778bebb Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Sat, 28 Jul 2018 13:55:23 +0200 Subject: [PATCH 2/6] Make sure that evaluated limit expressions cannot be null --- .../sql/catalyst/analysis/CheckAnalysis.scala | 18 ++++++--- .../analysis/AnalysisErrorSuite.scala | 4 +- .../test/resources/sql-tests/inputs/limit.sql | 4 +- .../resources/sql-tests/results/limit.sql.out | 40 +++++++++++-------- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 24a8ddf0a697e..a4144dc85326a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -62,19 +62,25 @@ trait CheckAnalysis extends PredicateHelper { private def checkLimitClause(limitExpr: Expression): Unit = { limitExpr match { - case e if e.nullable => failAnalysis( - "The limit expression must not be nullable, but got " + - limitExpr.sql) case e if !e.foldable => failAnalysis( "The limit expression must evaluate to a constant value, but got " + limitExpr.sql) case e if e.dataType != IntegerType => failAnalysis( s"The limit expression must be integer type, but got " + e.dataType.simpleString) - case e if e.eval().asInstanceOf[Int] < 0 => failAnalysis( + case _ => // OK + } + + val evalledExpression = limitExpr.eval() + + evalledExpression match { + case null => failAnalysis( + "The evaluated limit expression must not be null, but got " + + limitExpr.sql) + case e if e.asInstanceOf[Int] < 0 => failAnalysis( "The limit expression must be equal to or greater than 0, but got " + - e.eval().asInstanceOf[Int]) - case e => // OK + evalledExpression.asInstanceOf[Int]) + case _ => // OK } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index bc404d5c1f949..6c02ef5bb8a0d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -400,9 +400,9 @@ class AnalysisErrorSuite extends AnalysisTest { ) errorTest( - "limit clause must not be nullable", + "an evaluated limit class must not be null", testRelation.limit(Literal(null, IntegerType)), - "The limit expression must not be nullable, but got " :: Nil + "The evaluated limit expression must not be null, but got " :: Nil ) errorTest( diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index 0eb5080ef6d07..b4c73cf33e53a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -13,7 +13,9 @@ SELECT * FROM testdata LIMIT CAST(1 AS int); SELECT * FROM testdata LIMIT -1; SELECT * FROM testData TABLESAMPLE (-1 ROWS); --- limit may not be nullable + +SELECT * FROM testdata LIMIT CAST(1 AS INT); +-- evaluated limit must not be null SELECT * FROM testdata LIMIT CAST(NULL AS INT); -- limit must be foldable diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 7affa2b886fbc..02fe1de84f753 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 13 +-- Number of queries: 14 -- !query 0 @@ -66,53 +66,61 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 7 -SELECT * FROM testdata LIMIT CAST(NULL AS INT) +SELECT * FROM testdata LIMIT CAST(1 AS INT) -- !query 7 schema -struct<> +struct -- !query 7 output -org.apache.spark.sql.AnalysisException -The limit expression must not be nullable, but got CAST(NULL AS INT); +1 1 -- !query 8 -SELECT * FROM testdata LIMIT key > 3 +SELECT * FROM testdata LIMIT CAST(NULL AS INT) -- !query 8 schema struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The evaluated limit expression must not be null, but got CAST(NULL AS INT); -- !query 9 -SELECT * FROM testdata LIMIT true +SELECT * FROM testdata LIMIT key > 3 -- !query 9 schema struct<> -- !query 9 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got boolean; +The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); -- !query 10 -SELECT * FROM testdata LIMIT 'a' +SELECT * FROM testdata LIMIT true -- !query 10 schema struct<> -- !query 10 output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got string; +The limit expression must be integer type, but got boolean; -- !query 11 -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 +SELECT * FROM testdata LIMIT 'a' -- !query 11 schema -struct +struct<> -- !query 11 output -4 +org.apache.spark.sql.AnalysisException +The limit expression must be integer type, but got string; -- !query 12 -SELECT * FROM testdata WHERE key < 3 LIMIT ALL +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 -- !query 12 schema -struct +struct -- !query 12 output +4 + + +-- !query 13 +SELECT * FROM testdata WHERE key < 3 LIMIT ALL +-- !query 13 schema +struct +-- !query 13 output 1 1 2 2 From 5ce97b43cc653f1dbdb6335e77e71b0101fd12af Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Sat, 28 Jul 2018 13:58:02 +0200 Subject: [PATCH 3/6] Changed evalledExpression to alias e --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a4144dc85326a..b2db9cd99593d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -79,7 +79,7 @@ trait CheckAnalysis extends PredicateHelper { limitExpr.sql) case e if e.asInstanceOf[Int] < 0 => failAnalysis( "The limit expression must be equal to or greater than 0, but got " + - evalledExpression.asInstanceOf[Int]) + e.asInstanceOf[Int]) case _ => // OK } } From 5235f321704b335ba90b6f6655141bf06cb344e6 Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Sat, 28 Jul 2018 20:44:33 +0200 Subject: [PATCH 4/6] Remove merge conflict missing bracket --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 25c764112fff4..2f21841c5cc96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -80,7 +80,7 @@ trait CheckAnalysis extends PredicateHelper { case e if e.asInstanceOf[Int] < 0 => failAnalysis( "The limit expression must be equal to or greater than 0, but got " + e.asInstanceOf[Int]) - case _ => // OK + case e => // OK } } @@ -647,6 +647,7 @@ trait CheckAnalysis extends PredicateHelper { // are not allowed to have any correlated expressions. case p => failOnOuterReferenceInSubTree(p) - } + }} } } + From c0eb69037a9e63284edc2b2868b929de3c20c38f Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Sat, 28 Jul 2018 20:45:32 +0200 Subject: [PATCH 5/6] Remove double white line --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 2f21841c5cc96..9f5449a2a99ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -650,4 +650,3 @@ trait CheckAnalysis extends PredicateHelper { }} } } - From 60b9af3a8dbb9fe75f53ceae36e71c273a991db4 Mon Sep 17 00:00:00 2001 From: Mauro Palsgraaf Date: Mon, 30 Jul 2018 23:07:14 +0200 Subject: [PATCH 6/6] Include match statement inline of checking the limit --- .../sql/catalyst/analysis/CheckAnalysis.scala | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9f5449a2a99ea..4addc83add3e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -68,19 +68,14 @@ trait CheckAnalysis extends PredicateHelper { case e if e.dataType != IntegerType => failAnalysis( s"The limit expression must be integer type, but got " + e.dataType.catalogString) - case _ => // OK - } - - val evalledExpression = limitExpr.eval() - - evalledExpression match { - case null => failAnalysis( - "The evaluated limit expression must not be null, but got " + - limitExpr.sql) - case e if e.asInstanceOf[Int] < 0 => failAnalysis( - "The limit expression must be equal to or greater than 0, but got " + - e.asInstanceOf[Int]) - case e => // OK + case e => + e.eval() match { + case null => failAnalysis( + s"The evaluated limit expression must not be null, but got ${limitExpr.sql}") + case v: Int if v < 0 => failAnalysis( + s"The limit expression must be equal to or greater than 0, but got $v") + case _ => // OK + } } }