From 1d4b0d4f2e3d1cad9da3d73bb6206bf8759fc9ac Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 23 Aug 2018 12:09:05 -0700 Subject: [PATCH 1/7] [SPARK-25221][DEPLOY] Consistent trailing whitespace treatment of conf values --- .../scala/org/apache/spark/util/Utils.scala | 6 +++-- .../spark/deploy/SparkSubmitSuite.scala | 27 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 4593b057fc634..55f851385d25a 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2062,8 +2062,10 @@ private[spark] object Utils extends Logging { try { val properties = new Properties() properties.load(inReader) - properties.stringPropertyNames().asScala.map( - k => (k, properties.getProperty(k).trim)).toMap + properties.stringPropertyNames().asScala + .map(k => (k, properties.getProperty(k))) + .toMap + } catch { case e: IOException => throw new SparkException(s"Failed when loading Spark properties from $filename", e) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index f829fecc30840..b1854af77bb31 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1144,6 +1144,33 @@ class SparkSubmitSuite conf1.get(PY_FILES.key) should be (s"s3a://${pyFile.getAbsolutePath}") conf1.get("spark.submit.pyFiles") should (startWith("/")) } + + test("handles white space in --properties-file and --conf uniformly") { + val delimKey = "spark.my.delimiter" + val delimKeyFromFile = s"${delimKey}FromFile" + val newLine = "\n" + val props = new java.util.Properties() + val propsFile = File.createTempFile("test-spark-conf", ".properties", Utils.createTempDir()) + val propsOutputStream = new FileOutputStream(propsFile) + try { + props.put(delimKeyFromFile, newLine) + props.store(propsOutputStream, "test whitespace") + } finally { + propsOutputStream.close() + } + + val clArgs = Seq( + "--class", "org.SomeClass", + "--conf", s"${delimKey}=$newLine", + "--conf", "spark.master=yarn", + "--properties-file", propsFile.getPath, + "thejar.jar") + + val appArgs = new SparkSubmitArguments(clArgs) + val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs) + conf.get(delimKey) should be (newLine) + conf.get(delimKeyFromFile) should be (newLine) + } } object SparkSubmitSuite extends SparkFunSuite with TimeLimits { From 60a41dc2ff86560381faba472ee2a43ac6804439 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 29 Aug 2018 11:37:38 -0700 Subject: [PATCH 2/7] Preserve natural line delimiters --- .../scala/org/apache/spark/util/Utils.scala | 25 ++++++++++++++-- .../spark/deploy/SparkSubmitSuite.scala | 29 ++++++++++++++----- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 55f851385d25a..3189961b19020 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -19,7 +19,6 @@ package org.apache.spark.util import java.io._ import java.lang.{Byte => JByte} -import java.lang.InternalError import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo} import java.lang.reflect.InvocationTargetException import java.math.{MathContext, RoundingMode} @@ -2052,6 +2051,28 @@ private[spark] object Utils extends Logging { } } + + private[this] val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch => ch > ' ' || ch == '\r' || ch == '\n' } + + /** + * Implements the same logic as JDK java.lang.String#trim by removing leading and trailing non-printable characters + * less or equal to '\u0020' (SPACE) but preserves natural line delimiters according to + * [[java.util.Properties]] load method. The natural line delimiters are removed by JDK during load. + * Therefore any remaining ones have been specifically provided and escaped by the user, and must not be ignored + * + * @param str + * @return the trimmed value of str + */ + def trimExceptCRLF(str: String): String = { + val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter) + val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter) + if (firstPos >= 0 && lastPos >= 0) { + str.substring(firstPos, lastPos + 1) + } else { + "" + } + } + /** Load properties present in the given file. */ def getPropertiesFromFile(filename: String): Map[String, String] = { val file = new File(filename) @@ -2063,7 +2084,7 @@ private[spark] object Utils extends Logging { val properties = new Properties() properties.load(inReader) properties.stringPropertyNames().asScala - .map(k => (k, properties.getProperty(k))) + .map(k => (k, trimExceptCRLF(properties.getProperty(k)))) .toMap } catch { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index b1854af77bb31..7cd614907fd55 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1145,15 +1145,23 @@ class SparkSubmitSuite conf1.get("spark.submit.pyFiles") should (startWith("/")) } - test("handles white space in --properties-file and --conf uniformly") { - val delimKey = "spark.my.delimiter" - val delimKeyFromFile = s"${delimKey}FromFile" - val newLine = "\n" + test("handles natural line delimiters in --properties-file and --conf uniformly") { + val delimKey = "spark.my.delimiter." + val LF = "\n" + val CR = "\r" + + val leadingDelimKeyFromFile = s"${delimKey}leadingDelimKeyFromFile" -> s"${LF}blah" + val trailingDelimKeyFromFile = s"${delimKey}trailingDelimKeyFromFile" -> s"blah${CR}" + val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}" + val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f" + + val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile, nonDelimSpaceFromFile) + val props = new java.util.Properties() val propsFile = File.createTempFile("test-spark-conf", ".properties", Utils.createTempDir()) val propsOutputStream = new FileOutputStream(propsFile) try { - props.put(delimKeyFromFile, newLine) + testProps.foreach { case (k, v) => props.put(k, v) } props.store(propsOutputStream, "test whitespace") } finally { propsOutputStream.close() @@ -1161,15 +1169,20 @@ class SparkSubmitSuite val clArgs = Seq( "--class", "org.SomeClass", - "--conf", s"${delimKey}=$newLine", + "--conf", s"${delimKey}=$LF", "--conf", "spark.master=yarn", "--properties-file", propsFile.getPath, "thejar.jar") val appArgs = new SparkSubmitArguments(clArgs) val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs) - conf.get(delimKey) should be (newLine) - conf.get(delimKeyFromFile) should be (newLine) + + Seq((delimKey -> LF), leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile) + .foreach { + case (k, v) => conf.get(k) should be (v) + } + + conf.get(nonDelimSpaceFromFile._1) should be ("blah") } } From 9e4ac104dd524e19f1675a383d84a89751da0af7 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 1 Sep 2018 17:27:35 -0700 Subject: [PATCH 3/7] Unit test for trimExceptCRLF @jerryshao @HyukjinKwon @steveloughran --- .../scala/org/apache/spark/util/Utils.scala | 2 +- .../org/apache/spark/util/UtilsSuite.scala | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 3189961b19020..fd2cb8fdfaef8 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2063,7 +2063,7 @@ private[spark] object Utils extends Logging { * @param str * @return the trimmed value of str */ - def trimExceptCRLF(str: String): String = { + private[util] def trimExceptCRLF(str: String): String = { val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter) val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter) if (firstPos >= 0 && lastPos >= 0) { diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 943b53522d64e..1dac6946ff5f7 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -1205,6 +1205,39 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { assert(Utils.stringHalfWidth("\u0967\u0968\u0969") == 3) // scalastyle:on nonascii } + + test("trimExceptCRLF standalone") { + val crlfSet = Set("\r", "\n") + val nonPrintableButCRLF = + (0 to 32).map(_.toChar.toString).toSet -- crlfSet + + // identity for CRLF + crlfSet + .foreach(s => Utils.trimExceptCRLF(s) === s) + + // empty for other non-printables + nonPrintableButCRLF + .foreach(s => assert(Utils.trimExceptCRLF(s) === "")) + + // identity for a printable string + assert(Utils.trimExceptCRLF("a") === "a") + + // identity for strings with CRLF + crlfSet + .foreach { s => + assert(Utils.trimExceptCRLF(s"${s}a") === s"${s}a") + assert(Utils.trimExceptCRLF(s"a${s}") === s"a${s}") + assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") + } + + // trim nonPrintableButCRLF except when inside a string + nonPrintableButCRLF + .foreach { s => + assert(Utils.trimExceptCRLF(s"${s}a") === "a") + assert(Utils.trimExceptCRLF(s"a${s}") === "a") + assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") + } + } } private class SimpleExtension From 01d0904542de3487fb65abfb513c15410a6a2b7b Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 7 Sep 2018 00:32:51 -0700 Subject: [PATCH 4/7] scalastyle --- .../main/scala/org/apache/spark/util/Utils.scala | 13 ++++++++----- .../org/apache/spark/deploy/SparkSubmitSuite.scala | 6 ++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index fd2cb8fdfaef8..f9c6e0ce00bca 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2052,13 +2052,16 @@ private[spark] object Utils extends Logging { } - private[this] val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch => ch > ' ' || ch == '\r' || ch == '\n' } + private[this] val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { + ch => ch > ' ' || ch == '\r' || ch == '\n' + } /** - * Implements the same logic as JDK java.lang.String#trim by removing leading and trailing non-printable characters - * less or equal to '\u0020' (SPACE) but preserves natural line delimiters according to - * [[java.util.Properties]] load method. The natural line delimiters are removed by JDK during load. - * Therefore any remaining ones have been specifically provided and escaped by the user, and must not be ignored + * Implements the same logic as JDK java.lang.String#trim by removing leading and trailing + * non-printable characters less or equal to '\u0020' (SPACE) but preserves natural line + * delimiters according to [[java.util.Properties]] load method. The natural line delimiters are + * removed by JDK during load. Therefore any remaining ones have been specifically provided and + * escaped by the user, and must not be ignored * * @param str * @return the trimmed value of str diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 7cd614907fd55..e625a9d6a4a4f 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1155,10 +1155,12 @@ class SparkSubmitSuite val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}" val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f" - val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile, nonDelimSpaceFromFile) + val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile, + nonDelimSpaceFromFile) val props = new java.util.Properties() - val propsFile = File.createTempFile("test-spark-conf", ".properties", Utils.createTempDir()) + val propsFile = File.createTempFile("test-spark-conf", ".properties", + Utils.createTempDir()) val propsOutputStream = new FileOutputStream(propsFile) try { testProps.foreach { case (k, v) => props.put(k, v) } From d341dae0511fd9456ab3e0bd686e11749d43aeb7 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 7 Sep 2018 19:50:53 -0700 Subject: [PATCH 5/7] Feedback @vanzin 1 --- .../scala/org/apache/spark/util/Utils.scala | 11 ++++---- .../spark/deploy/SparkSubmitSuite.scala | 15 ++++++---- .../org/apache/spark/util/UtilsSuite.scala | 28 ++++++++----------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index f9c6e0ce00bca..b112c4abbe617 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2051,11 +2051,6 @@ private[spark] object Utils extends Logging { } } - - private[this] val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { - ch => ch > ' ' || ch == '\r' || ch == '\n' - } - /** * Implements the same logic as JDK java.lang.String#trim by removing leading and trailing * non-printable characters less or equal to '\u0020' (SPACE) but preserves natural line @@ -2067,6 +2062,10 @@ private[spark] object Utils extends Logging { * @return the trimmed value of str */ private[util] def trimExceptCRLF(str: String): String = { + val nonSpaceOrNaturalLineDelimiter: Char => Boolean = { ch => + ch > ' ' || ch == '\r' || ch == '\n' + } + val firstPos = str.indexWhere(nonSpaceOrNaturalLineDelimiter) val lastPos = str.lastIndexWhere(nonSpaceOrNaturalLineDelimiter) if (firstPos >= 0 && lastPos >= 0) { @@ -2087,7 +2086,7 @@ private[spark] object Utils extends Logging { val properties = new Properties() properties.load(inReader) properties.stringPropertyNames().asScala - .map(k => (k, trimExceptCRLF(properties.getProperty(k)))) + .map { k => (k, trimExceptCRLF(properties.getProperty(k))) } .toMap } catch { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index e625a9d6a4a4f..9eae3605d0738 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1150,6 +1150,7 @@ class SparkSubmitSuite val LF = "\n" val CR = "\r" + val lineFeedFromCommandLine = s"${delimKey}lineFeedFromCommandLine" -> LF val leadingDelimKeyFromFile = s"${delimKey}leadingDelimKeyFromFile" -> s"${LF}blah" val trailingDelimKeyFromFile = s"${delimKey}trailingDelimKeyFromFile" -> s"blah${CR}" val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}" @@ -1171,7 +1172,7 @@ class SparkSubmitSuite val clArgs = Seq( "--class", "org.SomeClass", - "--conf", s"${delimKey}=$LF", + "--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}", "--conf", "spark.master=yarn", "--properties-file", propsFile.getPath, "thejar.jar") @@ -1179,10 +1180,14 @@ class SparkSubmitSuite val appArgs = new SparkSubmitArguments(clArgs) val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs) - Seq((delimKey -> LF), leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile) - .foreach { - case (k, v) => conf.get(k) should be (v) - } + Seq( + lineFeedFromCommandLine, + leadingDelimKeyFromFile, + trailingDelimKeyFromFile, + infixDelimFromFile + ).foreach { case (k, v) => + conf.get(k) should be (v) + } conf.get(nonDelimSpaceFromFile._1) should be ("blah") } diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 1dac6946ff5f7..037ecb7cd00de 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -1212,31 +1212,27 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { (0 to 32).map(_.toChar.toString).toSet -- crlfSet // identity for CRLF - crlfSet - .foreach(s => Utils.trimExceptCRLF(s) === s) + crlfSet.foreach { s => Utils.trimExceptCRLF(s) === s } // empty for other non-printables - nonPrintableButCRLF - .foreach(s => assert(Utils.trimExceptCRLF(s) === "")) + nonPrintableButCRLF.foreach { s => assert(Utils.trimExceptCRLF(s) === "") } // identity for a printable string assert(Utils.trimExceptCRLF("a") === "a") // identity for strings with CRLF - crlfSet - .foreach { s => - assert(Utils.trimExceptCRLF(s"${s}a") === s"${s}a") - assert(Utils.trimExceptCRLF(s"a${s}") === s"a${s}") - assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") - } + crlfSet.foreach { s => + assert(Utils.trimExceptCRLF(s"${s}a") === s"${s}a") + assert(Utils.trimExceptCRLF(s"a${s}") === s"a${s}") + assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") + } // trim nonPrintableButCRLF except when inside a string - nonPrintableButCRLF - .foreach { s => - assert(Utils.trimExceptCRLF(s"${s}a") === "a") - assert(Utils.trimExceptCRLF(s"a${s}") === "a") - assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") - } + nonPrintableButCRLF.foreach { s => + assert(Utils.trimExceptCRLF(s"${s}a") === "a") + assert(Utils.trimExceptCRLF(s"a${s}") === "a") + assert(Utils.trimExceptCRLF(s"b${s}b") === s"b${s}b") + } } } From 603fdc9c148e2a36393140054c559f5a0a380e5d Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 7 Sep 2018 20:04:03 -0700 Subject: [PATCH 6/7] nonPrintableButCRLF in one line --- core/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 037ecb7cd00de..39f4fba78583f 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -1208,8 +1208,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { test("trimExceptCRLF standalone") { val crlfSet = Set("\r", "\n") - val nonPrintableButCRLF = - (0 to 32).map(_.toChar.toString).toSet -- crlfSet + val nonPrintableButCRLF = (0 to 32).map(_.toChar.toString).toSet -- crlfSet // identity for CRLF crlfSet.foreach { s => Utils.trimExceptCRLF(s) === s } From 76635cef7eab1fceef20fe0f4397460879cb0a43 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 10 Sep 2018 23:08:30 -0700 Subject: [PATCH 7/7] Backticks per @HyukjinKwon --- core/src/main/scala/org/apache/spark/util/Utils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index b112c4abbe617..14f68cd6f3509 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2052,7 +2052,7 @@ private[spark] object Utils extends Logging { } /** - * Implements the same logic as JDK java.lang.String#trim by removing leading and trailing + * Implements the same logic as JDK `java.lang.String#trim` by removing leading and trailing * non-printable characters less or equal to '\u0020' (SPACE) but preserves natural line * delimiters according to [[java.util.Properties]] load method. The natural line delimiters are * removed by JDK during load. Therefore any remaining ones have been specifically provided and