From 28d9ac3582f24df8dd9ebfb0199d0df0ebc0bd2f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 16 Dec 2016 13:53:32 -0800
Subject: [PATCH 1/4] [SPARK-18877][SQL] `inferField` on DecimalType should
 find a common type with `typeSoFar`

---
 .../sql/execution/datasources/csv/CSVInferSchema.scala     | 4 +++-
 .../execution/datasources/csv/CSVInferSchemaSuite.scala    | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index 88c608add140f..29437c760a9dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -85,7 +85,9 @@ private[csv] object CSVInferSchema {
         case NullType => tryParseInteger(field, options)
         case IntegerType => tryParseInteger(field, options)
         case LongType => tryParseLong(field, options)
-        case _: DecimalType => tryParseDecimal(field, options)
+        case _: DecimalType =>
+          // DecimalTypes have different precisions and scales, so we try to find the common type.
+          findTightestCommonType(typeSoFar, tryParseDecimal(field, options)).getOrElse(NullType)
         case DoubleType => tryParseDouble(field, options)
         case TimestampType => tryParseTimestamp(field, options)
         case BooleanType => tryParseBoolean(field, options)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 93f752d107ca3..fcdaaf566233e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -114,4 +114,11 @@ class CSVInferSchemaSuite extends SparkFunSuite {
     val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"))
     assert(CSVInferSchema.inferField(TimestampType, "2015-08", options) == TimestampType)
   }
+
+  test("SPARK-18877: `inferField` on DecimalType should find a common type with `typeSoFar`") {
+    val options = new CSVOptions(Map.empty[String, String])
+    // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
+    assert(CSVInferSchema.inferField(DecimalType(3, -10), "1.19E+11", options) ==
+      DecimalType(4, -9))
+  }
 }

From c1e07a9dc3970c76390031f293e485ff28a12136 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 28 Dec 2016 13:59:53 -0800
Subject: [PATCH 2/4] Add another testcase having precision 40 (over 38) and
 scale 20.

---
 .../sql/execution/datasources/csv/CSVInferSchemaSuite.scala  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index fcdaaf566233e..9f84a31a592ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -117,8 +117,13 @@ class CSVInferSchemaSuite extends SparkFunSuite {
 
   test("SPARK-18877: `inferField` on DecimalType should find a common type with `typeSoFar`") {
     val options = new CSVOptions(Map.empty[String, String])
+
     // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
     assert(CSVInferSchema.inferField(DecimalType(3, -10), "1.19E+11", options) ==
       DecimalType(4, -9))
+
+    // BigDecimal("12345678901234567890.01234567890123456789") is precision 40 and scale 20.
+    val value = "12345678901234567890.01234567890123456789"
+    assert(CSVInferSchema.inferField(DecimalType(3, -10), value, options) == DoubleType)
   }
 }

From 393d3a9ceaa6d92a301b5a2917e28d29518c1638 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 2 Jan 2017 20:12:24 -0800
Subject: [PATCH 3/4] Use `StringType`.

---
 .../spark/sql/execution/datasources/csv/CSVInferSchema.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index 29437c760a9dc..adc92fe5a31e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -87,7 +87,7 @@ private[csv] object CSVInferSchema {
         case LongType => tryParseLong(field, options)
         case _: DecimalType =>
           // DecimalTypes have different precisions and scales, so we try to find the common type.
-          findTightestCommonType(typeSoFar, tryParseDecimal(field, options)).getOrElse(NullType)
+          findTightestCommonType(typeSoFar, tryParseDecimal(field, options)).getOrElse(StringType)
         case DoubleType => tryParseDouble(field, options)
         case TimestampType => tryParseTimestamp(field, options)
         case BooleanType => tryParseBoolean(field, options)

From e59631bd54872a03eaa63cc74d0e245300bbc781 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 2 Jan 2017 22:04:17 -0800
Subject: [PATCH 4/4] Add the corresponding testcase

---
 .../sql/execution/datasources/csv/CSVInferSchemaSuite.scala  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 9f84a31a592ab..8620bb9f65b97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -125,5 +125,10 @@ class CSVInferSchemaSuite extends SparkFunSuite {
     // BigDecimal("12345678901234567890.01234567890123456789") is precision 40 and scale 20.
     val value = "12345678901234567890.01234567890123456789"
     assert(CSVInferSchema.inferField(DecimalType(3, -10), value, options) == DoubleType)
+
+    // Seq(s"${Long.MaxValue}1", "2015-12-01 00:00:00") should be StringType
+    assert(CSVInferSchema.inferField(NullType, s"${Long.MaxValue}1", options) == DecimalType(20, 0))
+    assert(CSVInferSchema.inferField(DecimalType(20, 0), "2015-12-01 00:00:00", options)
+      == StringType)
   }
 }