From ef57fdd5b0a6f7f0b6343c91c6983d20bc67fb5b Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 7 Aug 2018 13:23:43 +0800 Subject: [PATCH] Empty string for double and float types should be nulls in JSON --- .../org/apache/spark/sql/catalyst/json/JacksonParser.scala | 4 ++-- .../spark/sql/execution/datasources/json/JsonSuite.scala | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 4d409caddd33d..3b3bb3f8220b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -137,7 +137,7 @@ class JacksonParser( case VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT => parser.getFloatValue - case VALUE_STRING => + case VALUE_STRING if parser.getTextLength >= 1 => // Special case handling for NaN and Infinity. parser.getText match { case "NaN" => Float.NaN @@ -153,7 +153,7 @@ class JacksonParser( case VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT => parser.getDoubleValue - case VALUE_STRING => + case VALUE_STRING if parser.getTextLength >= 1 => // Special case handling for NaN and Infinity. parser.getText match { case "NaN" => Double.NaN diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 655f40ad549e6..4089c2347e6c0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2490,4 +2490,10 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { assert(exception.getMessage.contains("encoding must not be included in the blacklist")) } } + + test("SPARK-25040: empty strings should be treated as null for double and float") { + val df = spark.read.schema("a DOUBLE, b FLOAT") + .option("mode", "FAILFAST").json(Seq("""{"a":"","b": ""}""", """{"a": 1.1,"b": 1.1}""").toDS) + checkAnswer(df, Row(null, null) :: Row(1.1D, 1.1F) :: Nil) + } }