From 943c8d895d693ef5de5552150fe7b37c7eec0060 Mon Sep 17 00:00:00 2001 From: Javier Date: Wed, 22 Jan 2020 14:36:28 -0300 Subject: [PATCH 1/4] added fix and UT --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++++---- .../sql/hive/thriftserver/CliSuite.scala | 17 +++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index b665d4a31b9b1..026c5d7299c0a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -509,24 +509,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false + var insideComment = false var escape = false var beginIndex = 0 + var endIndex = line.length val ret = new JArrayList[String] + for (index <- 0 until line.length) { - if (line.charAt(index) == '\'') { + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } - } else if (line.charAt(index) == '\"') { + } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote } + } else if (line.charAt(index) == '-') { + val hasNext: Boolean = index + 1 < line.length + if (insideDoubleQuote || insideSingleQuote || insideComment) { + // ignore + } else if (hasNext && line.charAt(index+1) == '-') { + // ignore quotes and ; + insideComment = true + // ignore eol + endIndex = index + } else { + // continue + } + } else if (line.charAt(index) == ';') { - if (insideSingleQuote || insideDoubleQuote) { + if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { // split, do not include ; itself @@ -543,7 +559,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } } - ret.add(line.substring(beginIndex)) + ret.add(line.substring(beginIndex, endIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6609701be0ede..f003cc2a30022 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -400,4 +400,21 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { -> "1.000000000000000000" ) } + + test("SPARK-30049 Should not complaint for quotes in commented lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here + |;""".stripMargin -> "testcomment" + ) + } + + test("SPARK-30049 Should not complaint for quotes in commented with multi-lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + |comment continues here with single ' quote + | extra ' + |;""".stripMargin -> "testcomment" + ) + } + } From 6695496439ccff65683158e175350b22408332b6 Mon Sep 17 00:00:00 2001 From: Javier Date: Wed, 22 Jan 2020 15:50:18 -0300 Subject: [PATCH 2/4] linter fix --- .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 026c5d7299c0a..ffcc44707c82d 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -532,7 +532,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { val hasNext: Boolean = index + 1 < line.length if (insideDoubleQuote || insideSingleQuote || insideComment) { // ignore - } else if (hasNext && line.charAt(index+1) == '-') { + } else if (hasNext && line.charAt(index + 1) == '-') { // ignore quotes and ; insideComment = true // ignore eol From 074a943b223aa36ab33ed270e6eb506a45c45180 Mon Sep 17 00:00:00 2001 From: Javier Date: Thu, 6 Feb 2020 11:09:10 -0300 Subject: [PATCH 3/4] fixes --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 7 +++++-- .../apache/spark/sql/hive/thriftserver/CliSuite.scala | 11 ++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index ffcc44707c82d..930de79109415 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -529,9 +529,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { insideDoubleQuote = !insideDoubleQuote } } else if (line.charAt(index) == '-') { - val hasNext: Boolean = index + 1 < line.length + val hasNext = index + 1 < line.length if (insideDoubleQuote || insideSingleQuote || insideComment) { - // ignore + // Ignores '-' in any case of quotes or comment. + // Avoids to start a comment(--) within a quoted segment or already in a comment. + // Sample query: select "quoted value --" + // ^^ avoids starting a comment if it's inside quotes. } else if (hasNext && line.charAt(index + 1) == '-') { // ignore quotes and ; insideComment = true diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index f003cc2a30022..8c8784fa986d6 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -411,10 +411,15 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { test("SPARK-30049 Should not complaint for quotes in commented with multi-lines") { runCliWithin(1.minute)( """SELECT concat('test', 'comment') -- someone's comment here \\ - |comment continues here with single ' quote - | extra ' + | comment continues here with single ' quote \\ + | extra ' \\ |;""".stripMargin -> "testcomment" ) + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + | comment continues here with single ' quote \\ + | extra ' \\ + | ;""".stripMargin -> "testcomment" + ) } - } From c4924a73aa73d92c33397299c098954d749726ed Mon Sep 17 00:00:00 2001 From: Javier Fuentes Date: Wed, 26 Feb 2020 07:19:58 -0300 Subject: [PATCH 4/4] Fixes --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 3 --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 930de79109415..19f7ea85c6cf0 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -540,10 +540,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { insideComment = true // ignore eol endIndex = index - } else { - // continue } - } else if (line.charAt(index) == ';') { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 8c8784fa986d6..43aafc3c8590c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -401,14 +401,14 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { ) } - test("SPARK-30049 Should not complaint for quotes in commented lines") { + test("SPARK-30049 Should not complain for quotes in commented lines") { runCliWithin(1.minute)( """SELECT concat('test', 'comment') -- someone's comment here |;""".stripMargin -> "testcomment" ) } - test("SPARK-30049 Should not complaint for quotes in commented with multi-lines") { + test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { runCliWithin(1.minute)( """SELECT concat('test', 'comment') -- someone's comment here \\ | comment continues here with single ' quote \\