From 93f43adaf7bf4d283e6aacef8f9aaf3419fd30a9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 11 Apr 2019 19:35:53 +0800 Subject: [PATCH 1/6] Upcast should not allow casting from string to other types --- docs/sql-migration-guide-upgrade.md | 2 ++ .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 ++--- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 7 ++++--- .../main/scala/org/apache/spark/sql/types/DataType.scala | 2 +- .../sql/catalyst/encoders/EncoderResolutionSuite.scala | 6 +++--- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 4 ++-- .../spark/sql/types/DataTypeWriteCompatibilitySuite.scala | 2 +- .../scala/org/apache/spark/sql/hive/client/HiveShim.scala | 2 +- 8 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index b1935224150d9..dbca5d1a6d0e0 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -124,6 +124,8 @@ license: | - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. + - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis. + ## Upgrading From Spark SQL 2.3 to 2.4 - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 01e40e64a3e8d..182216c961ee1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2562,7 +2562,7 @@ class Analyzer( case e => e.sql } throw new AnalysisException(s"Cannot up cast $fromStr from " + - s"${from.dataType.catalogString} to ${to.catalogString} as it may truncate\n" + + s"${from.dataType.catalogString} to ${to.catalogString}.\n" + "The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") + "You can either add an explicit cast to the input data or choose a higher precision " + "type of the field in the target object") @@ -2575,8 +2575,7 @@ class Analyzer( case p => p transformExpressions { case u @ UpCast(child, _, _) if !child.resolved => u - case UpCast(child, dataType, walkedTypePath) - if Cast.mayTruncate(child.dataType, dataType) => + case UpCast(child, dataType, walkedTypePath) if !Cast.canUpCast(child.dataType, dataType) => fail(child, dataType, walkedTypePath) case UpCast(child, dataType, walkedTypePath) => Cast(child, dataType.asNullable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index f7bc8b9ce40f4..504d8d309f71a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -139,16 +139,17 @@ object Cast { } /** - * Returns true iff we can safely cast the `from` type to `to` type without any truncating or + * Returns true iff we can safely up-cast the `from` type to `to` type without any truncating or * precision lose, e.g. int -> long, date -> timestamp. */ - def canSafeCast(from: AtomicType, to: AtomicType): Boolean = (from, to) match { + def canUpCast(from: DataType, to: DataType): Boolean = (from, to) match { case _ if from == to => true case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true - case (from, to) if legalNumericPrecedence(from, to) => true + case (f, t) if legalNumericPrecedence(f, t) => true case (DateType, TimestampType) => true case (_, StringType) => true + // TODO: consider complex types. case _ => false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index c58f7a2397374..f08895f579576 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -443,7 +443,7 @@ object DataType { fieldCompatible case (w: AtomicType, r: AtomicType) => - if (!Cast.canSafeCast(w, r)) { + if (!Cast.canUpCast(w, r)) { addError(s"Cannot safely cast '$context': $w to $r") false } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index dd20e6497fbb4..da1b695919dec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -80,7 +80,7 @@ class EncoderResolutionSuite extends PlanTest { val attrs = Seq('arr.array(StringType)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == s""" - |Cannot up cast array element from string to bigint as it may truncate + |Cannot up cast array element from string to bigint. |The type path of the target object is: |- array element class: "scala.Long" |- field (class: "scala.Array", name: "arr") @@ -202,7 +202,7 @@ class EncoderResolutionSuite extends PlanTest { }.message assert(msg1 == s""" - |Cannot up cast `b` from bigint to int as it may truncate + |Cannot up cast `b` from bigint to int. |The type path of the target object is: |- field (class: "scala.Int", name: "b") |- root class: "org.apache.spark.sql.catalyst.encoders.StringIntClass" @@ -215,7 +215,7 @@ class EncoderResolutionSuite extends PlanTest { }.message assert(msg2 == s""" - |Cannot up cast `b`.`b` from decimal(38,18) to bigint as it may truncate + |Cannot up cast `b`.`b` from decimal(38,18) to bigint. |The type path of the target object is: |- field (class: "scala.Long", name: "b") |- field (class: "org.apache.spark.sql.catalyst.encoders.StringLongClass", name: "b") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1b7f2581f8958..9b0e256e5f4aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -980,12 +980,12 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { val (safeTargetTypes, unsafeTargetTypes) = numericTypes.partition(to => isCastSafe(from, to)) safeTargetTypes.foreach { to => - assert(Cast.canSafeCast(from, to), s"It should be possible to safely cast $from to $to") + assert(Cast.canUpCast(from, to), s"It should be possible to safely cast $from to $to") assert(!Cast.mayTruncate(from, to), s"No truncation is expected when casting $from to $to") } unsafeTargetTypes.foreach { to => - assert(!Cast.canSafeCast(from, to), s"It shouldn't be possible to safely cast $from to $to") + assert(!Cast.canUpCast(from, to), s"It shouldn't be possible to safely cast $from to $to") assert(Cast.mayTruncate(from, to), s"Truncation is expected when casting $from to $to") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala index 87d1cd4d60beb..9cc9894f2044d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala @@ -67,7 +67,7 @@ class DataTypeWriteCompatibilitySuite extends SparkFunSuite { test("Check atomic types: write allowed only when casting is safe") { atomicTypes.foreach { w => atomicTypes.foreach { r => - if (Cast.canSafeCast(w, r)) { + if (Cast.canUpCast(w, r)) { assertAllowed(w, r, "t", s"Should allow writing $w to $r because cast is safe") } else { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index af5ea59429b5e..18f8c53609812 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -679,7 +679,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { expr match { case attr: Attribute => Some(attr) case Cast(child @ AtomicType(), dt: AtomicType, _) - if Cast.canSafeCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child) + if Cast.canUpCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child) case _ => None } } From 13a7846ceeaaa47f0d629e4f8aec01e975719414 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 15 Apr 2019 21:31:31 +0800 Subject: [PATCH 2/6] fix a case --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 504d8d309f71a..7584ee76e285f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -149,6 +149,10 @@ object Cast { case (f, t) if legalNumericPrecedence(f, t) => true case (DateType, TimestampType) => true case (_, StringType) => true + // Spark supports casting between long and timestamp, please see `longToTimestamp` and + // `timestampToLong` for details. + case (TimestampType, LongType) => true + case (LongType, TimestampType) => true // TODO: consider complex types. case _ => false } From a19990606ebf7cec7f46c76f745d74f0dacff296 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 15 May 2019 14:11:06 +0800 Subject: [PATCH 3/6] improve comment --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 7584ee76e285f..9df97b49e79df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -140,7 +140,8 @@ object Cast { /** * Returns true iff we can safely up-cast the `from` type to `to` type without any truncating or - * precision lose, e.g. int -> long, date -> timestamp. + * precision lose or possible runtime failures. For example, long -> int, string -> int are not + * up-cast. */ def canUpCast(from: DataType, to: DataType): Boolean = (from, to) match { case _ if from == to => true From 7c6d9a852baeb5d54ad1e0247e1d423c12f06b4b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 15 May 2019 14:17:25 +0800 Subject: [PATCH 4/6] update view as well --- .../spark/sql/catalyst/analysis/view.scala | 4 ++-- .../spark/sql/catalyst/expressions/Cast.scala | 13 ----------- .../sql/catalyst/expressions/CastSuite.scala | 22 ++++--------------- 3 files changed, 6 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala index 6134d54531a19..24276e11d8443 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala @@ -73,8 +73,8 @@ case class AliasViewChild(conf: SQLConf) extends Rule[LogicalPlan] with CastSupp case (attr, originAttr) if !attr.semanticEquals(originAttr) => // The dataType of the output attributes may be not the same with that of the view // output, so we should cast the attribute to the dataType of the view output attribute. - // Will throw an AnalysisException if the cast can't perform or might truncate. - if (Cast.mayTruncate(originAttr.dataType, attr.dataType)) { + // Will throw an AnalysisException if the cast is not a up-cast. + if (!Cast.canUpCast(originAttr.dataType, attr.dataType)) { throw new AnalysisException(s"Cannot up cast ${originAttr.sql} from " + s"${originAttr.dataType.catalogString} to ${attr.dataType.catalogString} as it " + s"may truncate\n") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 9df97b49e79df..42a93fc7fd886 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -119,19 +119,6 @@ object Cast { case _ => false } - /** - * Return true iff we may truncate during casting `from` type to `to` type. e.g. long -> int, - * timestamp -> date. - */ - def mayTruncate(from: DataType, to: DataType): Boolean = (from, to) match { - case (from: NumericType, to: DecimalType) if !to.isWiderThan(from) => true - case (from: DecimalType, to: NumericType) if !from.isTighterThan(to) => true - case (from, to) if illegalNumericPrecedence(from, to) => true - case (TimestampType, DateType) => true - case (StringType, to: NumericType) => true - case _ => false - } - private def illegalNumericPrecedence(from: DataType, to: DataType): Boolean = { val fromPrecedence = TypeCoercion.numericPrecedence.indexOf(from) val toPrecedence = TypeCoercion.numericPrecedence.indexOf(to) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 9b0e256e5f4aa..c23ac383a4a95 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -956,37 +956,23 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(ret6, "[1, [1 -> a, 2 -> b, 3 -> c]]") } - test("SPARK-26706: Fix Cast.mayTruncate for bytes") { - assert(!Cast.mayTruncate(ByteType, ByteType)) - assert(!Cast.mayTruncate(DecimalType.ByteDecimal, ByteType)) - assert(Cast.mayTruncate(ShortType, ByteType)) - assert(Cast.mayTruncate(IntegerType, ByteType)) - assert(Cast.mayTruncate(LongType, ByteType)) - assert(Cast.mayTruncate(FloatType, ByteType)) - assert(Cast.mayTruncate(DoubleType, ByteType)) - assert(Cast.mayTruncate(DecimalType.IntDecimal, ByteType)) - } - - test("canSafeCast and mayTruncate must be consistent for numeric types") { - import DataTypeTestUtils._ - + test("up-cast") { def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match { case (_, dt: DecimalType) => dt.isWiderThan(from) case (dt: DecimalType, _) => dt.isTighterThan(to) case _ => numericPrecedence.indexOf(from) <= numericPrecedence.indexOf(to) } + import DataTypeTestUtils.numericTypes numericTypes.foreach { from => val (safeTargetTypes, unsafeTargetTypes) = numericTypes.partition(to => isCastSafe(from, to)) safeTargetTypes.foreach { to => - assert(Cast.canUpCast(from, to), s"It should be possible to safely cast $from to $to") - assert(!Cast.mayTruncate(from, to), s"No truncation is expected when casting $from to $to") + assert(Cast.canUpCast(from, to), s"It should be possible to up-cast $from to $to") } unsafeTargetTypes.foreach { to => - assert(!Cast.canUpCast(from, to), s"It shouldn't be possible to safely cast $from to $to") - assert(Cast.mayTruncate(from, to), s"Truncation is expected when casting $from to $to") + assert(!Cast.canUpCast(from, to), s"It shouldn't be possible to up-cast $from to $to") } } } From cf8d05f87bb6aa63040bd26091ad88c2483fc5d6 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 15 May 2019 14:59:04 +0800 Subject: [PATCH 5/6] support complex types --- .../spark/sql/catalyst/expressions/Cast.scala | 22 ++++++++++----- .../sql/catalyst/expressions/CastSuite.scala | 27 +++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 42a93fc7fd886..a311283157893 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -119,12 +119,6 @@ object Cast { case _ => false } - private def illegalNumericPrecedence(from: DataType, to: DataType): Boolean = { - val fromPrecedence = TypeCoercion.numericPrecedence.indexOf(from) - val toPrecedence = TypeCoercion.numericPrecedence.indexOf(to) - toPrecedence >= 0 && fromPrecedence > toPrecedence - } - /** * Returns true iff we can safely up-cast the `from` type to `to` type without any truncating or * precision lose or possible runtime failures. For example, long -> int, string -> int are not @@ -137,11 +131,25 @@ object Cast { case (f, t) if legalNumericPrecedence(f, t) => true case (DateType, TimestampType) => true case (_, StringType) => true + // Spark supports casting between long and timestamp, please see `longToTimestamp` and // `timestampToLong` for details. case (TimestampType, LongType) => true case (LongType, TimestampType) => true - // TODO: consider complex types. + + case (ArrayType(fromType, fn), ArrayType(toType, tn)) => + resolvableNullability(fn, tn) && canUpCast(fromType, toType) + + case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => + resolvableNullability(fn, tn) && canUpCast(fromKey, toKey) && canUpCast(fromValue, toValue) + + case (StructType(fromFields), StructType(toFields)) => + fromFields.length == toFields.length && + fromFields.zip(toFields).forall { + case (f1, f2) => + resolvableNullability(f1.nullable, f2.nullable) && canUpCast(f1.dataType, f2.dataType) + } + case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index c23ac383a4a95..6a82c30cb5dc4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -963,16 +963,43 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { case _ => numericPrecedence.indexOf(from) <= numericPrecedence.indexOf(to) } + def makeComplexTypes(dt: NumericType, nullable: Boolean): Seq[DataType] = { + Seq( + new StructType().add("a", dt, nullable).add("b", dt, nullable), + ArrayType(dt, nullable), + MapType(dt, dt, nullable), + ArrayType(new StructType().add("a", dt, nullable), nullable), + new StructType().add("a", ArrayType(dt, nullable), nullable) + ) + } + import DataTypeTestUtils.numericTypes numericTypes.foreach { from => val (safeTargetTypes, unsafeTargetTypes) = numericTypes.partition(to => isCastSafe(from, to)) safeTargetTypes.foreach { to => assert(Cast.canUpCast(from, to), s"It should be possible to up-cast $from to $to") + + // If the nullability is compatible, we can up-cast complex types too. + Seq(true -> true, false -> false, false -> true).foreach { case (fn, tn) => + makeComplexTypes(from, fn).zip(makeComplexTypes(to, tn)).foreach { + case (complexFromType, complexToType) => + assert(Cast.canUpCast(complexFromType, complexToType)) + } + } + + makeComplexTypes(from, true).zip(makeComplexTypes(to, false)).foreach { + case (complexFromType, complexToType) => + assert(!Cast.canUpCast(complexFromType, complexToType)) + } } unsafeTargetTypes.foreach { to => assert(!Cast.canUpCast(from, to), s"It shouldn't be possible to up-cast $from to $to") + makeComplexTypes(from, true).zip(makeComplexTypes(to, true)).foreach { + case (complexFromType, complexToType) => + assert(!Cast.canUpCast(complexFromType, complexToType)) + } } } } From e558ad458baef148fb9749f00175fcd67bce45c8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 17 May 2019 20:03:10 +0800 Subject: [PATCH 6/6] add a legacy config --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++++- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 182216c961ee1..30b59b7caa229 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2575,10 +2575,15 @@ class Analyzer( case p => p transformExpressions { case u @ UpCast(child, _, _) if !child.resolved => u + case UpCast(child, dt: AtomicType, _) + if SQLConf.get.getConf(SQLConf.LEGACY_LOOSE_UPCAST) && + child.dataType == StringType => + Cast(child, dt.asNullable) + case UpCast(child, dataType, walkedTypePath) if !Cast.canUpCast(child.dataType, dataType) => fail(child, dataType, walkedTypePath) - case UpCast(child, dataType, walkedTypePath) => Cast(child, dataType.asNullable) + case UpCast(child, dataType, _) => Cast(child, dataType.asNullable) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index f33cc86a18a1b..a24f038e9d7c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1735,6 +1735,11 @@ object SQLConf { "and from_utc_timestamp() functions.") .booleanConf .createWithDefault(false) + + val LEGACY_LOOSE_UPCAST = buildConf("spark.sql.legacy.looseUpcast") + .doc("When true, the upcast will be loose and allows string to numeric/boolean.") + .booleanConf + .createWithDefault(false) } /**