From 8c8313c7689c04ce781011c420f193ac2a14d9d9 Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Sat, 22 Aug 2020 19:49:47 +0300 Subject: [PATCH 1/8] Add special values to LiteralGenerator for float and double --- .../catalyst/expressions/LiteralGenerator.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index d92eb01b69bf0..ba1eeabfe096b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -70,14 +70,22 @@ object LiteralGenerator { lazy val floatLiteralGen: Gen[Literal] = for { - f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, - Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) + f <- Gen.oneOf( + Gen.oneOf( + Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue, + 0.0f, -0.0f, 1.0f, -1.0f), + Arbitrary.arbFloat.arbitrary + ) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { - f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, - Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) + f <- Gen.oneOf( + Gen.oneOf( + Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue, + 0.0, -0.0, 1.0, -1.0), + Arbitrary.arbDouble.arbitrary + ) } yield Literal.create(f, DoubleType) // TODO cache the generated data From 77d63ac4b9356f2d8db407e05b763f3b4107c80d Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Sun, 23 Aug 2020 04:56:13 +0300 Subject: [PATCH 2/8] Add 'MaxValue' to LiteralGenerator for float and double --- .../spark/sql/catalyst/expressions/LiteralGenerator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index ba1eeabfe096b..6ad43d379d93b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -73,7 +73,7 @@ object LiteralGenerator { f <- Gen.oneOf( Gen.oneOf( Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue, - 0.0f, -0.0f, 1.0f, -1.0f), + Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f), Arbitrary.arbFloat.arbitrary ) } yield Literal.create(f, FloatType) @@ -83,7 +83,7 @@ object LiteralGenerator { f <- Gen.oneOf( Gen.oneOf( Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue, - 0.0, -0.0, 1.0, -1.0), + Double.MaxValue, 0.0, -0.0, 1.0, -1.0), Arbitrary.arbDouble.arbitrary ) } yield Literal.create(f, DoubleType) From 818abe2382ef57bb23177570d725662c0d6f1bf6 Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Sun, 23 Aug 2020 13:00:18 +0300 Subject: [PATCH 3/8] Ensure -0.0 and 0.0 comparison consistency --- .../apache/spark/sql/catalyst/expressions/predicates.scala | 6 ++++-- .../spark/sql/catalyst/expressions/LiteralGenerator.scala | 7 +++++-- .../spark/sql/catalyst/expressions/PredicateSuite.scala | 7 +++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index aa5cf4758564b..86f250d55fa67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -793,7 +793,9 @@ case class EqualTo(left: Expression, right: Expression) // | FALSE | FALSE | TRUE | UNKNOWN | // | UNKNOWN | UNKNOWN | UNKNOWN | UNKNOWN | // +---------+---------+---------+---------+ - protected override def nullSafeEval(left: Any, right: Any): Any = ordering.equiv(left, right) + protected override def nullSafeEval(left: Any, right: Any): Any = { + left == right || ordering.equiv(left, right) + } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { defineCodeGen(ctx, ev, (c1, c2) => ctx.genEqual(left.dataType, c1, c2)) @@ -845,7 +847,7 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp } else if (input1 == null || input2 == null) { false } else { - ordering.equiv(input1, input2) + input1 == input2 || ordering.equiv(input1, input2) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index 6ad43d379d93b..c8e3b0e157319 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -68,12 +68,15 @@ object LiteralGenerator { lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) + // The floatLiteralGen and doubleLiteralGen will 50% of the time yield arbitrary values + // and 50% of the time will yield some special values that are more likely to reveal + // corner cases. This behavior is similar to the integral value generators. lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.oneOf( Gen.oneOf( Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue, - Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f), + Float.MaxValue, -Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f), Arbitrary.arbFloat.arbitrary ) } yield Literal.create(f, FloatType) @@ -83,7 +86,7 @@ object LiteralGenerator { f <- Gen.oneOf( Gen.oneOf( Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue, - Double.MaxValue, 0.0, -0.0, 1.0, -1.0), + Double.MaxValue, -Double.MaxValue, 0.0, -0.0, 1.0, -1.0), Arbitrary.arbDouble.arbitrary ) } yield Literal.create(f, DoubleType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index 1ad0a8ed758f4..ddb0996591561 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -496,6 +496,13 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(EqualTo(infinity, infinity), true) } + test("SPARK-32688: 0.0 and -0.0 should be considered equal") { + checkEvaluation(EqualTo(Literal(0.0), Literal(-0.0)), true) + checkEvaluation(EqualNullSafe(Literal(0.0), Literal(-0.0)), true) + checkEvaluation(EqualTo(Literal(0.0f), Literal(-0.0f)), true) + checkEvaluation(EqualNullSafe(Literal(0.0f), Literal(-0.0f)), true) + } + test("SPARK-22693: InSet should not use global variables") { val ctx = new CodegenContext InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx) From 1116d3d2e4e27c777176c11425ea5465e3a6ff7e Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Sun, 23 Aug 2020 15:39:23 +0300 Subject: [PATCH 4/8] Revert equality check --- .../apache/spark/sql/catalyst/expressions/predicates.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 86f250d55fa67..aa5cf4758564b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -793,9 +793,7 @@ case class EqualTo(left: Expression, right: Expression) // | FALSE | FALSE | TRUE | UNKNOWN | // | UNKNOWN | UNKNOWN | UNKNOWN | UNKNOWN | // +---------+---------+---------+---------+ - protected override def nullSafeEval(left: Any, right: Any): Any = { - left == right || ordering.equiv(left, right) - } + protected override def nullSafeEval(left: Any, right: Any): Any = ordering.equiv(left, right) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { defineCodeGen(ctx, ev, (c1, c2) => ctx.genEqual(left.dataType, c1, c2)) @@ -847,7 +845,7 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp } else if (input1 == null || input2 == null) { false } else { - input1 == input2 || ordering.equiv(input1, input2) + ordering.equiv(input1, input2) } } From 2dce36e9afac3b1b0e8abdc63836e8c48143a46c Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Sun, 23 Aug 2020 23:44:42 +0300 Subject: [PATCH 5/8] test -0.0 equality for complex types --- .../expressions/LiteralGenerator.scala | 2 ++ .../catalyst/expressions/PredicateSuite.scala | 35 +++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index c8e3b0e157319..f89b3859adb36 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -178,6 +178,8 @@ object LiteralGenerator { case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) + case ArrayType(et, _) => randomGen(et).map( + lit => Literal.create(Array(lit.value), ArrayType(et))) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index ddb0996591561..d6d52eba4d095 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp} import scala.collection.immutable.HashSet import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.RandomDataGenerator +import org.apache.spark.sql.{RandomDataGenerator, Row} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT @@ -91,6 +91,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { DataTypeTestUtils.propertyCheckSupported.foreach { dt => checkConsistencyBetweenInterpretedAndCodegen(EqualTo, dt, dt) checkConsistencyBetweenInterpretedAndCodegen(EqualNullSafe, dt, dt) + + val arrayType = ArrayType(dt) + checkConsistencyBetweenInterpretedAndCodegen(EqualTo, arrayType, arrayType) + checkConsistencyBetweenInterpretedAndCodegen(EqualNullSafe, arrayType, arrayType) } } @@ -496,11 +500,30 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(EqualTo(infinity, infinity), true) } - test("SPARK-32688: 0.0 and -0.0 should be considered equal") { - checkEvaluation(EqualTo(Literal(0.0), Literal(-0.0)), true) - checkEvaluation(EqualNullSafe(Literal(0.0), Literal(-0.0)), true) - checkEvaluation(EqualTo(Literal(0.0f), Literal(-0.0f)), true) - checkEvaluation(EqualNullSafe(Literal(0.0f), Literal(-0.0f)), true) + private def testEquality(literals: Seq[Literal]): Unit = { + literals.foreach(left => { + literals.foreach(right => { + checkEvaluation(EqualTo(left, right), true) + checkEvaluation(EqualNullSafe(left, right), true) + + val leftArray = Literal.create(Array(left.value), ArrayType(left.dataType)) + val rightArray = Literal.create(Array(right.value), ArrayType(right.dataType)) + checkEvaluation(EqualTo(leftArray, rightArray), true) + checkEvaluation(EqualNullSafe(leftArray, rightArray), true) + + val leftStruct = Literal.create( + Row(left.value), new StructType().add("a", left.dataType)) + val rightStruct = Literal.create( + Row(right.value), new StructType().add("a", right.dataType)) + checkEvaluation(EqualTo(leftStruct, rightStruct), true) + checkEvaluation(EqualNullSafe(leftStruct, rightStruct), true) + }) + }) + } + + test("SPARK-32688: 0.0 and -0.0 should be equal") { + testEquality(Seq(Literal(0.0), Literal(-0.0))) + testEquality(Seq(Literal(0.0f), Literal(-0.0f))) } test("SPARK-22693: InSet should not use global variables") { From 9a3b98316cea83c750a53bb5ce335c77ddccd2d5 Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Mon, 24 Aug 2020 12:54:04 +0300 Subject: [PATCH 6/8] Add -0.0 as special value to RandomDataGenerator --- .../test/scala/org/apache/spark/sql/RandomDataGenerator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 6bd7a27ac11f1..9fa27c7df3832 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -260,10 +260,10 @@ object RandomDataGenerator { new MathContext(precision)).bigDecimal) case DoubleType => randomNumeric[Double]( rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue, - Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0)) + Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0, -0.0)) case FloatType => randomNumeric[Float]( rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue, - Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f)) + Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f, -0.0f)) case ByteType => randomNumeric[Byte]( rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte)) case IntegerType => randomNumeric[Int]( From d20a2ed46c07f50391b369bf8d4c268105f81246 Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Fri, 4 Sep 2020 13:35:32 +0300 Subject: [PATCH 7/8] revert test -0.0 equality for complex types --- .../expressions/LiteralGenerator.scala | 2 -- .../catalyst/expressions/PredicateSuite.scala | 30 ------------------- 2 files changed, 32 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index f89b3859adb36..c8e3b0e157319 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -178,8 +178,6 @@ object LiteralGenerator { case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) - case ArrayType(et, _) => randomGen(et).map( - lit => Literal.create(Array(lit.value), ArrayType(et))) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index d6d52eba4d095..b85e6b602fc26 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -91,10 +91,6 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { DataTypeTestUtils.propertyCheckSupported.foreach { dt => checkConsistencyBetweenInterpretedAndCodegen(EqualTo, dt, dt) checkConsistencyBetweenInterpretedAndCodegen(EqualNullSafe, dt, dt) - - val arrayType = ArrayType(dt) - checkConsistencyBetweenInterpretedAndCodegen(EqualTo, arrayType, arrayType) - checkConsistencyBetweenInterpretedAndCodegen(EqualNullSafe, arrayType, arrayType) } } @@ -500,32 +496,6 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(EqualTo(infinity, infinity), true) } - private def testEquality(literals: Seq[Literal]): Unit = { - literals.foreach(left => { - literals.foreach(right => { - checkEvaluation(EqualTo(left, right), true) - checkEvaluation(EqualNullSafe(left, right), true) - - val leftArray = Literal.create(Array(left.value), ArrayType(left.dataType)) - val rightArray = Literal.create(Array(right.value), ArrayType(right.dataType)) - checkEvaluation(EqualTo(leftArray, rightArray), true) - checkEvaluation(EqualNullSafe(leftArray, rightArray), true) - - val leftStruct = Literal.create( - Row(left.value), new StructType().add("a", left.dataType)) - val rightStruct = Literal.create( - Row(right.value), new StructType().add("a", right.dataType)) - checkEvaluation(EqualTo(leftStruct, rightStruct), true) - checkEvaluation(EqualNullSafe(leftStruct, rightStruct), true) - }) - }) - } - - test("SPARK-32688: 0.0 and -0.0 should be equal") { - testEquality(Seq(Literal(0.0), Literal(-0.0))) - testEquality(Seq(Literal(0.0f), Literal(-0.0f))) - } - test("SPARK-22693: InSet should not use global variables") { val ctx = new CodegenContext InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx) From 529aba8e7207a22039aebbe379d9d27035a143c1 Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Fri, 4 Sep 2020 13:36:54 +0300 Subject: [PATCH 8/8] revert test -0.0 equality for complex types --- .../apache/spark/sql/catalyst/expressions/PredicateSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index b85e6b602fc26..1ad0a8ed758f4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp} import scala.collection.immutable.HashSet import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.{RandomDataGenerator, Row} +import org.apache.spark.sql.RandomDataGenerator import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT