From 38f973bb124c63c1caabe14ee6e5cca7b764b15a Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Fri, 2 Oct 2015 16:20:56 -0700 Subject: [PATCH 1/6] [SPARK-8654] Analysis exception when using NULL IN (...) : invalid cast In the analysis phase , while processing the rules for IN predicate, we compare the in-list types to the lhs expression type and generate cast operation if necessary. In the case of NULL [NOT] IN expr1 , we end up generating cast between in list types to NULL like cast (1 as NULL) which is not a valid cast. The fix is to not generate such a cast if the lhs type is a NullType instead we translate the expression to Literal(Null). --- .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 5 +++++ .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 87a3845b2d9e5..e24d6a9fdfbf4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -305,12 +305,17 @@ object HiveTypeCoercion { /** * Convert all expressions in in() list to the left operator type + * except when the left operator type is NullType. In case when left hand + * operator type is NullType create a Literal(Null). */ object InConversion extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e + case i @ In(a, b) if (a.dataType == NullType) => + Literal.create(null, BooleanType) + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 820b336aac759..2af580a603445 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -135,4 +135,11 @@ class AnalysisSuite extends AnalysisTest { plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col")) checkAnalysis(plan, plan) } + + test("SPARK-8654: invalid CAST in NULL IN(...) expression") { + val plan = Project(Alias(In(Literal(null), Seq(Literal(1), Literal(2))), "a")() :: Nil, + LocalRelation() + ) + assertAnalysisSuccess(plan, false) + } } From acbd6a62621cf0f0f0991f535c4e86bc47e50d34 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 5 Oct 2015 12:23:07 -0700 Subject: [PATCH 2/6] Fixing testcase based on review comments --- .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 2af580a603445..7b8386b78b096 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -140,6 +140,6 @@ class AnalysisSuite extends AnalysisTest { val plan = Project(Alias(In(Literal(null), Seq(Literal(1), Literal(2))), "a")() :: Nil, LocalRelation() ) - assertAnalysisSuccess(plan, false) + assertAnalysisSuccess(plan) } } From d33e786d08919eceb81a29ca26f0e4c289f6713b Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Wed, 7 Oct 2015 19:32:09 -0700 Subject: [PATCH 3/6] Raise an error if the inlist contains incompatible types --- .../sql/catalyst/analysis/HiveTypeCoercion.scala | 7 ++++++- .../sql/catalyst/analysis/AnalysisSuite.scala | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index e24d6a9fdfbf4..c9be84ae3c00f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -314,7 +314,12 @@ object HiveTypeCoercion { case e if !e.childrenResolved => e case i @ In(a, b) if (a.dataType == NullType) => - Literal.create(null, BooleanType) + var inTypes:Seq[DataType] = Seq.empty + b.foreach(e => inTypes = inTypes ++ Seq(e.dataType)) + findWiderCommonType(inTypes) match { + case Some(finalDataType) => Literal.create(null, BooleanType) + case None => i + } case i @ In(a, b) if b.exists(_.dataType != a.dataType) => i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 7b8386b78b096..e3460153756e8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -142,4 +142,18 @@ class AnalysisSuite extends AnalysisTest { ) assertAnalysisSuccess(plan) } + + test("SPARK-8654: different types in inlist but can be converted to a commmon type") { + val plan = Project(Alias(In(Literal(null), Seq(Literal(1), Literal(1.2345))), "a")() :: Nil, + LocalRelation() + ) + assertAnalysisSuccess(plan) + } + + test("SPARK-8654: check type compatibility error") { + val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil, + LocalRelation() + ) + assertAnalysisError(plan,Seq("cannot resolve 'null IN (true,1)' due to data type mismatch: Arguments must be same type")) + } } From ac9265dbb86b58a04a110ff84582ad1335cd1ce9 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Wed, 7 Oct 2015 19:56:57 -0700 Subject: [PATCH 4/6] Fix scala style check failures --- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 2 +- .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index c9be84ae3c00f..b0d7c46d66502 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -314,7 +314,7 @@ object HiveTypeCoercion { case e if !e.childrenResolved => e case i @ In(a, b) if (a.dataType == NullType) => - var inTypes:Seq[DataType] = Seq.empty + var inTypes : Seq[DataType] = Seq.empty b.foreach(e => inTypes = inTypes ++ Seq(e.dataType)) findWiderCommonType(inTypes) match { case Some(finalDataType) => Literal.create(null, BooleanType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index e3460153756e8..cf4008e6d51f0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -154,6 +154,7 @@ class AnalysisSuite extends AnalysisTest { val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil, LocalRelation() ) - assertAnalysisError(plan,Seq("cannot resolve 'null IN (true,1)' due to data type mismatch: Arguments must be same type")) + assertAnalysisError(plan, Seq("cannot resolve 'null IN (true,1)' due to data " + + "type mismatch: Arguments must be same type")) } } From cf976a679c96de9a0db18b14f216c34ee1753662 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 8 Oct 2015 02:12:25 -0700 Subject: [PATCH 5/6] Apply code review comments --- .../sql/catalyst/analysis/HiveTypeCoercion.scala | 14 ++++---------- .../sql/catalyst/analysis/AnalysisSuite.scala | 3 +-- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index b0d7c46d66502..c037e6ee2b7ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -305,24 +305,18 @@ object HiveTypeCoercion { /** * Convert all expressions in in() list to the left operator type - * except when the left operator type is NullType. In case when left hand - * operator type is NullType create a Literal(Null). + * except when the left operator type is NullType. */ object InConversion extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e - case i @ In(a, b) if (a.dataType == NullType) => - var inTypes : Seq[DataType] = Seq.empty - b.foreach(e => inTypes = inTypes ++ Seq(e.dataType)) - findWiderCommonType(inTypes) match { - case Some(finalDataType) => Literal.create(null, BooleanType) + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => + findWiderCommonType(i.children.map(_.dataType)) match { + case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType))) case None => i } - - case i @ In(a, b) if b.exists(_.dataType != a.dataType) => - i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index cf4008e6d51f0..77a4765e7751c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -154,7 +154,6 @@ class AnalysisSuite extends AnalysisTest { val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil, LocalRelation() ) - assertAnalysisError(plan, Seq("cannot resolve 'null IN (true,1)' due to data " + - "type mismatch: Arguments must be same type")) + assertAnalysisError(plan, Seq("data type mismatch: Arguments must be same type")) } } From 6bfdf2fb0a73f1eddc37b68dfafb04ca15a2a0d9 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 8 Oct 2015 02:37:48 -0700 Subject: [PATCH 6/6] Fix the comment --- .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index c037e6ee2b7ca..7192c931d2e51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -304,8 +304,10 @@ object HiveTypeCoercion { } /** - * Convert all expressions in in() list to the left operator type - * except when the left operator type is NullType. + * Convert the value and in list expressions to the common operator type + * by looking at all the argument types and finding the closest one that + * all the arguments can be cast to. When no common operator type is found + * an Analysis Exception is raised. */ object InConversion extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {