From 120d919d1f0be1780321f6f2a7f130526ed06232 Mon Sep 17 00:00:00 2001 From: Qian Sun Date: Mon, 7 Apr 2025 17:43:29 +0800 Subject: [PATCH 1/3] [GLUTEN-9240][VL] Write NULL value into relation in gluten unit tests --- .../execution/JsonFunctionsValidateSuite.scala | 18 +++++++++++++++--- .../ScalarFunctionsValidateSuite.scala | 16 ++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala index fc118a1e095f..07739b1cc117 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala @@ -54,9 +54,21 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite { runQueryAndCompare( s"select l_orderkey, json_array_length('[1,2,3,4]') " + s"from lineitem limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) - runQueryAndCompare( - s"select l_orderkey, json_array_length(null) " + - s"from lineitem limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + withTempPath { + path => + Seq[(String)]( + (null.asInstanceOf[String]) + ) + .toDF("txt") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("select json_array_length(txt) from tbl") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } } testWithSpecifiedSparkVersion("from_json function bool", Some("3.4")) { diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 4821f4f9f567..521574ee6003 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -187,8 +187,20 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } testWithSpecifiedSparkVersion("null input for array_size", Some("3.3")) { - runQueryAndCompare("SELECT array_size(null)") { - checkGlutenOperatorMatch[ProjectExecTransformer] + withTempPath { + path => + Seq[(String)]( + (null.asInstanceOf[String]) + ) + .toDF("txt") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("select array_size(txt) from tbl") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } } } From b5c85eeb3111d141477e4329b3ca5c3a5fe26d76 Mon Sep 17 00:00:00 2001 From: Qian Sun Date: Mon, 7 Apr 2025 18:05:30 +0800 Subject: [PATCH 2/3] write primitive type value into relation --- .../JsonFunctionsValidateSuite.scala | 22 +++++--- .../ScalarFunctionsValidateSuite.scala | 50 ++++++++++++++++--- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala index 07739b1cc117..24ddb48d813c 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonFunctionsValidateSuite.scala @@ -30,10 +30,20 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite { checkGlutenOperatorMatch[ProjectExecTransformer] } - runQueryAndCompare( - "SELECT l_orderkey, get_json_object('{\"a\":\"b\"}', '$.a') " + - "from lineitem limit 1;") { - checkGlutenOperatorMatch[ProjectExecTransformer] + withTempPath { + path => + Seq[(String)]( + ("""{"a":"b"}""") + ) + .toDF("txt") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("select get_json_object(txt, '$.a') from tbl") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } } // Invalid UTF-8 encoding. @@ -51,12 +61,10 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite { runQueryAndCompare( s"select *, json_array_length(string_field1) " + s"from datatab limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) - runQueryAndCompare( - s"select l_orderkey, json_array_length('[1,2,3,4]') " + - s"from lineitem limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) withTempPath { path => Seq[(String)]( + ("[1,2,3,4]"), (null.asInstanceOf[String]) ) .toDF("txt") diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 521574ee6003..bbdfdfdc1186 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -762,9 +762,20 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { test("Test sequence function optimized by Spark constant folding") { withSQLConf(("spark.sql.optimizer.excludedRules", NullPropagation.ruleName)) { - runQueryAndCompare("""SELECT sequence(1, 5), l_orderkey - | from lineitem limit 100""".stripMargin) { - checkGlutenOperatorMatch[ProjectExecTransformer] + withTempPath { + path => + Seq[(Integer, Integer)]( + (1, 5) + ) + .toDF("val1", "val2") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("SELECT sequence(val1, val2) from tbl") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } } } } @@ -813,8 +824,20 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } test("Test sum/count function") { - runQueryAndCompare("""SELECT sum(2),count(2) from lineitem""".stripMargin) { - checkGlutenOperatorMatch[BatchScanExecTransformer] + withTempPath { + path => + Seq[(Integer, Integer)]( + (2, 2) + ) + .toDF("val1", "val2") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("SELECT sum(val1),count(val2) from tbl") { + checkGlutenOperatorMatch[BatchScanExecTransformer] + } } } @@ -830,9 +853,20 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } testWithSpecifiedSparkVersion("Test width_bucket function", Some("3.4")) { - runQueryAndCompare("""SELECT width_bucket(2, 0, 4, 3), l_orderkey - | from lineitem limit 100""".stripMargin) { - checkGlutenOperatorMatch[ProjectExecTransformer] + withTempPath { + path => + Seq[(Integer, Integer, Integer, Integer)]( + (2, 0, 4, 3) + ) + .toDF("val1", "val2", "val3", "val4") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") + + runQueryAndCompare("SELECT width_bucket(val1, val2, val3, val4) from tbl") { + checkGlutenOperatorMatch[BatchScanExecTransformer] + } } } From 877a130edbf0072907888ca9f1557ef85169fa34 Mon Sep 17 00:00:00 2001 From: Qian Sun Date: Tue, 8 Apr 2025 09:07:24 +0800 Subject: [PATCH 3/3] fix data type --- .../ScalarFunctionsValidateSuite.scala | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index bbdfdfdc1186..c5cbfe7b8367 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -189,8 +189,8 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { testWithSpecifiedSparkVersion("null input for array_size", Some("3.3")) { withTempPath { path => - Seq[(String)]( - (null.asInstanceOf[String]) + Seq[(Array[Int])]( + (null.asInstanceOf[Array[Int]]) ) .toDF("txt") .write @@ -762,20 +762,9 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { test("Test sequence function optimized by Spark constant folding") { withSQLConf(("spark.sql.optimizer.excludedRules", NullPropagation.ruleName)) { - withTempPath { - path => - Seq[(Integer, Integer)]( - (1, 5) - ) - .toDF("val1", "val2") - .write - .parquet(path.getCanonicalPath) - - spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl") - - runQueryAndCompare("SELECT sequence(val1, val2) from tbl") { - checkGlutenOperatorMatch[ProjectExecTransformer] - } + runQueryAndCompare("""SELECT sequence(1, 5), l_orderkey + | from lineitem limit 100""".stripMargin) { + checkGlutenOperatorMatch[ProjectExecTransformer] } } }