From cf863bb1cbccea16c6838950a1daa3070fa3b6e3 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 11 Sep 2018 00:02:40 -0700 Subject: [PATCH 1/3] fix. --- .../sql/catalyst/optimizer/expressions.scala | 10 +++-- .../BooleanSimplificationSuite.scala | 39 +++++++++++++++++-- .../org/apache/spark/sql/DataFrameSuite.scala | 10 +++++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 5629b72894225..7b6e0468a438e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -263,10 +263,12 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { case TrueLiteral Or _ => TrueLiteral case _ Or TrueLiteral => TrueLiteral - case a And b if Not(a).semanticEquals(b) => FalseLiteral - case a Or b if Not(a).semanticEquals(b) => TrueLiteral - case a And b if a.semanticEquals(Not(b)) => FalseLiteral - case a Or b if a.semanticEquals(Not(b)) => TrueLiteral + case a And b if !a.nullable && !b.nullable && + (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => + FalseLiteral + case a Or b if !a.nullable && !b.nullable && + (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => + TrueLiteral case a And b if a.semanticEquals(b) => a case a Or b if a.semanticEquals(b) => a diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 653c07f1835ca..fe2fd96f1a599 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -48,6 +48,14 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { testRelation.output, Seq(Row(1, 2, 3, "abc")) ) + val testNotnullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull, + 'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull, + 'h.boolean.notNull) + + val testNotnullableRelationWithData = LocalRelation.fromExternalRows( + testNotnullableRelation.output, Seq(Row(1, 2, 3, "abc")) + ) + private def checkCondition(input: Expression, expected: LogicalPlan): Unit = { val plan = testRelationWithData.where(input).analyze val actual = Optimize.execute(plan) @@ -61,6 +69,13 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { comparePlans(actual, correctAnswer) } + private def checkConditionInNotNullableRelation( + input: Expression, expected: LogicalPlan): Unit = { + val plan = testNotnullableRelationWithData.where(input).analyze + val actual = Optimize.execute(plan) + comparePlans(actual, expected) + } + test("a && a => a") { checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) @@ -174,10 +189,26 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { } test("Complementation Laws") { - checkCondition('a && !'a, testRelation) - checkCondition(!'a && 'a, testRelation) + checkConditionInNotNullableRelation('e && !'e, testNotnullableRelation) + checkConditionInNotNullableRelation(!'e && 'e, testNotnullableRelation) + + checkConditionInNotNullableRelation('e || !'e, testNotnullableRelationWithData) + checkConditionInNotNullableRelation(!'e || 'e, testNotnullableRelationWithData) + } + + test("Complementation Laws - null handling") { + checkCondition('e && !'e, testRelationWithData.where('e && !'e).analyze) + checkCondition(!'e && 'e, testRelationWithData.where(!'e && 'e).analyze) + + checkCondition('e || !'e, testRelationWithData.where('e || !'e).analyze) + checkCondition(!'e || 'e, testRelationWithData.where(!'e || 'e).analyze) + } + + test("Complementation Laws - negative case") { + checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze) + checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze) - checkCondition('a || !'a, testRelationWithData) - checkCondition(!'a || 'a, testRelationWithData) + checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze) + checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 435b887cb3c78..279b7b8d49f52 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2569,4 +2569,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { check(lit(2).cast("int"), $"c" === 2, Seq(Row(1, 1, 2, 0), Row(1, 1, 2, 1))) check(lit(2).cast("int"), $"c" =!= 2, Seq()) } + + test("SPARK-25402 Null handling in BooleanSimplification") { + val schema = StructType.fromDDL("a boolean, b int") + val rows = Seq(Row(null, 1)) + + val rdd = sparkContext.parallelize(rows) + val df = spark.createDataFrame(rdd, schema) + + checkAnswer(df.where("(NOT a) OR a"), Seq.empty) + } } From b05052e29fd1e6f8c11586c336864e8808bef737 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 11 Sep 2018 09:02:46 -0700 Subject: [PATCH 2/3] fix. --- .../sql/catalyst/optimizer/expressions.scala | 9 ++++++ .../BooleanSimplificationSuite.scala | 31 +++++++++++-------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 7b6e0468a438e..084e8243b3220 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -266,9 +266,18 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { case a And b if !a.nullable && !b.nullable && (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => FalseLiteral + case a And b if Not(a).semanticEquals(b) => + If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral) + case a And b if a.semanticEquals(Not(b)) => + If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral) + case a Or b if !a.nullable && !b.nullable && (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => TrueLiteral + case a Or b if Not(a).semanticEquals(b) => + If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral) + case a Or b if a.semanticEquals(Not(b)) => + If(IsNull(b), Literal.create(null, b.dataType), TrueLiteral) case a And b if a.semanticEquals(b) => a case a Or b if a.semanticEquals(b) => a diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index fe2fd96f1a599..2df661b741b3f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.BooleanType class BooleanSimplificationSuite extends PlanTest with PredicateHelper { @@ -48,12 +49,12 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { testRelation.output, Seq(Row(1, 2, 3, "abc")) ) - val testNotnullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull, + val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull, 'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull, 'h.boolean.notNull) - val testNotnullableRelationWithData = LocalRelation.fromExternalRows( - testNotnullableRelation.output, Seq(Row(1, 2, 3, "abc")) + val testNotNullableRelationWithData = LocalRelation.fromExternalRows( + testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc")) ) private def checkCondition(input: Expression, expected: LogicalPlan): Unit = { @@ -71,7 +72,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { private def checkConditionInNotNullableRelation( input: Expression, expected: LogicalPlan): Unit = { - val plan = testNotnullableRelationWithData.where(input).analyze + val plan = testNotNullableRelationWithData.where(input).analyze val actual = Optimize.execute(plan) comparePlans(actual, expected) } @@ -189,19 +190,23 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { } test("Complementation Laws") { - checkConditionInNotNullableRelation('e && !'e, testNotnullableRelation) - checkConditionInNotNullableRelation(!'e && 'e, testNotnullableRelation) + checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation) + checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation) - checkConditionInNotNullableRelation('e || !'e, testNotnullableRelationWithData) - checkConditionInNotNullableRelation(!'e || 'e, testNotnullableRelationWithData) + checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData) + checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData) } test("Complementation Laws - null handling") { - checkCondition('e && !'e, testRelationWithData.where('e && !'e).analyze) - checkCondition(!'e && 'e, testRelationWithData.where(!'e && 'e).analyze) - - checkCondition('e || !'e, testRelationWithData.where('e || !'e).analyze) - checkCondition(!'e || 'e, testRelationWithData.where(!'e || 'e).analyze) + checkCondition('e && !'e, + testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze) + checkCondition(!'e && 'e, + testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze) + + checkCondition('e || !'e, + testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze) + checkCondition(!'e || 'e, + testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze) } test("Complementation Laws - negative case") { From 61b2d551b19755c741b527a6f3578c3a46c544c3 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 11 Sep 2018 22:48:27 -0700 Subject: [PATCH 3/3] fix. --- .../apache/spark/sql/catalyst/optimizer/expressions.scala | 6 ------ .../sql/catalyst/optimizer/BooleanSimplificationSuite.scala | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 084e8243b3220..f8037588fa71e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -263,17 +263,11 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { case TrueLiteral Or _ => TrueLiteral case _ Or TrueLiteral => TrueLiteral - case a And b if !a.nullable && !b.nullable && - (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => - FalseLiteral case a And b if Not(a).semanticEquals(b) => If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral) case a And b if a.semanticEquals(Not(b)) => If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral) - case a Or b if !a.nullable && !b.nullable && - (Not(a).semanticEquals(b) || a.semanticEquals(Not(b))) => - TrueLiteral case a Or b if Not(a).semanticEquals(b) => If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral) case a Or b if a.semanticEquals(Not(b)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 2df661b741b3f..6cd1108eef333 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -38,6 +38,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { Batch("Constant Folding", FixedPoint(50), NullPropagation, ConstantFolding, + SimplifyConditionals, BooleanSimplification, PruneFilters) :: Nil }