From c5f9a00cc256f272c8d9e77aafaa1ad35b757029 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 15:12:27 +0800 Subject: [PATCH 1/8] [SPARK-37907][SQL] StaticInvoke support ConstantFolding --- .../expressions/objects/objects.scala | 1 + .../optimizer/ConstantFoldingSuite.scala | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 50e214011b616..5b497e0f37ea5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -257,6 +257,7 @@ case class StaticInvoke( Utils.classForName(objectName) } + override def foldable: Boolean = children.forall(_.foldable) override def nullable: Boolean = needNullCheck || returnNullable override def children: Seq[Expression] = arguments diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index ae644c1110740..7276aae7c09fc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -21,10 +21,12 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Unresol import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.ByteArray class ConstantFoldingSuite extends PlanTest { @@ -299,4 +301,35 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-37907: StaticInvoke support ConstantFolding") { + val originalQuery = + testRelation + .select( + StaticInvoke( + classOf[ByteArray], + BinaryType, + "lpad", + Seq(Literal("Spark".getBytes), Literal(7), Literal("W".getBytes)), + Seq(BinaryType, IntegerType, BinaryType), + returnNullable = false).as("c1"), + StaticInvoke( + classOf[ByteArray], + BinaryType, + "rpad", + Seq(Literal("Spark".getBytes), Literal(7), Literal("W".getBytes)), + Seq(BinaryType, IntegerType, BinaryType), + returnNullable = false).as("c2")) + + val optimized = Optimize.execute(originalQuery.analyze) + + val correctAnswer = + testRelation + .select( + Literal("WWSpark".getBytes()).as("c1"), + Literal("SparkWW".getBytes()).as("c2")) + .analyze + + comparePlans(optimized, correctAnswer) + } } From 5815116e809064be0d0f8f20d037a8609d322d49 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 15:22:46 +0800 Subject: [PATCH 2/8] Update objects.scala --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 5b497e0f37ea5..8d3b5ae54b5dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -50,6 +50,7 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput def propagateNull: Boolean + override def foldable: Boolean = children.forall(_.foldable) protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true) protected lazy val needNullCheckForIndex: Array[Boolean] = arguments.map(a => a.nullable && (propagateNull || @@ -257,7 +258,6 @@ case class StaticInvoke( Utils.classForName(objectName) } - override def foldable: Boolean = children.forall(_.foldable) override def nullable: Boolean = needNullCheck || returnNullable override def children: Seq[Expression] = arguments From 36ccc12962d189cbe5114818e180b530fc39e398 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 20:45:26 +0800 Subject: [PATCH 3/8] Update CharVarcharTestSuite.scala --- .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 10eacdb08c424..6ade7a7c99e37 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -332,8 +332,8 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { sql(s"CREATE TABLE t(c STRUCT) USING $format") sql("INSERT INTO t SELECT struct(null)") checkAnswer(spark.table("t"), Row(Row(null))) - val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')")) - assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5")) + val e = intercept[RuntimeException](sql("INSERT INTO t SELECT struct('123456')")) + assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5")) } } From aebdef8c4cd9727d940634a94cd29d3ab2f5cbf6 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 21:04:20 +0800 Subject: [PATCH 4/8] Update objects.scala --- .../spark/sql/catalyst/expressions/objects/objects.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 8d3b5ae54b5dc..904baebfd6dfe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -50,7 +50,8 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput def propagateNull: Boolean - override def foldable: Boolean = children.forall(_.foldable) + override def foldable: Boolean = + children.forall(_.foldable) && !dataType.isInstanceOf[ObjectType] protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true) protected lazy val needNullCheckForIndex: Array[Boolean] = arguments.map(a => a.nullable && (propagateNull || From 0fa68851c560f4cb0fa1203d928c3c46ad6a1fc0 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sat, 15 Jan 2022 09:10:42 +0800 Subject: [PATCH 5/8] Update ConstantFoldingSuite.scala --- .../optimizer/ConstantFoldingSuite.scala | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 7276aae7c09fc..6f4f70423357b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -21,10 +21,11 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Unresol import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke +import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, StaticInvoke} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.ByteArray @@ -302,7 +303,7 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("SPARK-37907: StaticInvoke support ConstantFolding") { + test("SPARK-37907: InvokeLike support ConstantFolding") { val originalQuery = testRelation .select( @@ -313,13 +314,18 @@ class ConstantFoldingSuite extends PlanTest { Seq(Literal("Spark".getBytes), Literal(7), Literal("W".getBytes)), Seq(BinaryType, IntegerType, BinaryType), returnNullable = false).as("c1"), - StaticInvoke( - classOf[ByteArray], - BinaryType, - "rpad", - Seq(Literal("Spark".getBytes), Literal(7), Literal("W".getBytes)), - Seq(BinaryType, IntegerType, BinaryType), - returnNullable = false).as("c2")) + Invoke( + Literal.create("a", StringType), + "substring", + StringType, + Seq(Literal(0), Literal(1))).as("c2"), + NewInstance( + cls = classOf[GenericArrayData], + arguments = Literal.fromObject(List(1, 2, 3)) :: Nil, + inputTypes = Nil, + propagateNull = false, + dataType = ArrayType(IntegerType), + outerPointer = None).as("c3")) val optimized = Optimize.execute(originalQuery.analyze) @@ -327,7 +333,8 @@ class ConstantFoldingSuite extends PlanTest { testRelation .select( Literal("WWSpark".getBytes()).as("c1"), - Literal("SparkWW".getBytes()).as("c2")) + Literal.create("a", StringType).as("c2"), + Literal.create(new GenericArrayData(List(1, 2, 3)), ArrayType(IntegerType)).as("c3")) .analyze comparePlans(optimized, correctAnswer) From c9ff7f86cdb1af3c6303d06efd92f50b0a853609 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 20 Jan 2022 10:23:36 +0800 Subject: [PATCH 6/8] re--trigger From efa007012953184a499320d7e9e75a221cee9ad6 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 20 Jan 2022 13:20:16 +0800 Subject: [PATCH 7/8] Update objects.scala --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 6255189b5ff7f..2e924dde30e82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -51,7 +51,7 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput def propagateNull: Boolean override def foldable: Boolean = - children.forall(_.foldable) && !dataType.isInstanceOf[ObjectType] + children.forall(_.foldable) && deterministic && !dataType.isInstanceOf[ObjectType] protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true) protected lazy val needNullCheckForIndex: Array[Boolean] = arguments.map(a => a.nullable && (propagateNull || From 5cbd015f1fb4a57a00259038b63d2e01bf7bf68e Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 21 Jan 2022 15:19:33 +0800 Subject: [PATCH 8/8] Update objects.scala --- .../spark/sql/catalyst/expressions/objects/objects.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 2e924dde30e82..68a55f7f11696 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -50,8 +50,7 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput def propagateNull: Boolean - override def foldable: Boolean = - children.forall(_.foldable) && deterministic && !dataType.isInstanceOf[ObjectType] + override def foldable: Boolean = children.forall(_.foldable) && deterministic protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true) protected lazy val needNullCheckForIndex: Array[Boolean] = arguments.map(a => a.nullable && (propagateNull ||