From a84c12768500a3b0526a1a0364d5c5bd1810b0c0 Mon Sep 17 00:00:00 2001 From: weixiuli Date: Tue, 21 Jan 2025 19:15:06 +0800 Subject: [PATCH 1/4] [GLUTEN-8581][VL] Support Spark legacy date formatter whether the spark.sql.legacy.timeParserPolicy is set to LEGACY or legacy. --- cpp/velox/compute/WholeStageResultIterator.cc | 8 ++++-- .../spark/sql/GlutenDateFunctionsSuite.scala | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index c1ae6f218458..9aeb93bd25c6 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -551,8 +551,12 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId); - // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'. - if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { + // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY' + // or 'legacy'. + auto sparkPolicy = veloxCfg_->get(kSparkLegacyTimeParserPolicy, ""); + std::transform( + sparkPolicy.begin(), sparkPolicy.end(), sparkPolicy.begin(), [](unsigned char c) { return std::toupper(c); }); + if (sparkPolicy == "LEGACY") { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true"; } else { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false"; diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index ae86c9d06e81..e225f66bf07a 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -29,6 +29,34 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis) + test("unix_timestamp/to_unix_timestamp") { + Seq("corrected", "legacy").foreach { + time_parser_policy => + withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> time_parser_policy) { + val df = Seq("2022-01-04 14:52:40").toDF("d") + val fmt = "yyyy-MM-dd" + val r1 = df.selectExpr(s"unix_timestamp(d, '$fmt')") + val r2 = df.selectExpr(s"to_unix_timestamp(d, '$fmt')") + if (time_parser_policy == "corrected") { + checkAnswer(r1, Seq(Row(null))) + checkAnswer(r2, Seq(Row(null))) + } else { + checkAnswer(r1, Seq(Row(1641283200))) + checkAnswer(r2, Seq(Row(1641283200))) + } + val r3 = df.selectExpr(s"unix_timestamp('2022-01-04 14:52:40.017')") + val r4 = df.selectExpr(s"to_unix_timestamp('2022-01-04 14:52:40.017')") + if (time_parser_policy == "corrected") { + checkAnswer(r1, Seq(Row(null))) + checkAnswer(r2, Seq(Row(null))) + } else { + checkAnswer(r3, Seq(Row(1641336760))) + checkAnswer(r4, Seq(Row(1641336760))) + } + } + } + } + testGluten("unix_timestamp") { Seq("corrected", "legacy").foreach { legacyParserPolicy => From daff06f45a63eda6d84c7c71f9f0f2d597ae2d67 Mon Sep 17 00:00:00 2001 From: weixiuli Date: Wed, 22 Jan 2025 09:29:29 +0800 Subject: [PATCH 2/4] Address comments --- cpp/velox/compute/WholeStageResultIterator.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 9aeb93bd25c6..336edf3c20d0 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -554,8 +554,9 @@ std::unordered_map WholeStageResultIterator::getQueryC // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY' // or 'legacy'. auto sparkPolicy = veloxCfg_->get(kSparkLegacyTimeParserPolicy, ""); - std::transform( - sparkPolicy.begin(), sparkPolicy.end(), sparkPolicy.begin(), [](unsigned char c) { return std::toupper(c); }); + for (auto& c : sparkPolicy) { + c = std::toupper(static_cast(c)); + } if (sparkPolicy == "LEGACY") { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true"; } else { From 7e1d7cb30a35aa3ce4e16d999022f2584ae1ed19 Mon Sep 17 00:00:00 2001 From: weixiuli Date: Fri, 24 Jan 2025 01:41:10 +0000 Subject: [PATCH 3/4] Address comments --- cpp/velox/compute/WholeStageResultIterator.cc | 8 +-- .../spark/sql/GlutenDateFunctionsSuite.scala | 27 +++------- .../spark/sql/GlutenDateFunctionsSuite.scala | 26 +++------ .../spark/sql/GlutenDateFunctionsSuite.scala | 25 +++------ .../spark/sql/GlutenDateFunctionsSuite.scala | 54 +++---------------- .../apache/gluten/config/GlutenConfig.scala | 7 +++ 6 files changed, 33 insertions(+), 114 deletions(-) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 336edf3c20d0..b6023e1b8be6 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -552,12 +552,8 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId); // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY' - // or 'legacy'. - auto sparkPolicy = veloxCfg_->get(kSparkLegacyTimeParserPolicy, ""); - for (auto& c : sparkPolicy) { - c = std::toupper(static_cast(c)); - } - if (sparkPolicy == "LEGACY") { + // or 'legacy' + if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true"; } else { configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false"; diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index 5ddfe6fc1ff3..ab36914595d3 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -114,19 +114,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - // legacyParserPolicy is not respected by Gluten. // invalid format - // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") - // if (legacyParserPolicy == "legacy") { - // checkAnswer(invalid, - // Seq(Row(null), Row(null), Row(null), Row(null))) - // } else { - // val e = intercept[SparkUpgradeException](invalid.collect()) - // assert(e.getCause.isInstanceOf[IllegalArgumentException]) - // assert(e.getMessage.contains( - // "You may get a different result due to the upgrading to Spark")) - // } - + val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) // February val y1 = "2016-02-29" val y2 = "2017-02-29" @@ -198,11 +188,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(Row(secs(ts5.getTime)), Row(null))) - // Not consistent behavior with gluten + velox. - // invalid format - // val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") - // val e = intercept[IllegalArgumentException](invalid.collect()) - // assert(e.getMessage.contains('b')) + val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) } } } @@ -234,10 +221,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra if (legacyParserPolicy == "legacy") { // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off // the fractional part of seconds. The behavior was changed by SPARK-27438. - // Ignore this test case. Velox returns null for such case. - // val legacyFmt = "yyyy/MM/dd HH:mm:ss" - // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq( - // Row(ts1), Row(ts2))) + val legacyFmt = "yyyy/MM/dd HH:mm:ss" + checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) } else { checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index ae86c9d06e81..9c9a69d6a1ec 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -112,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - // legacyParserPolicy is not respected by Gluten. // invalid format - // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") - // if (legacyParserPolicy == "legacy") { - // checkAnswer(invalid, - // Seq(Row(null), Row(null), Row(null), Row(null))) - // } else { - // val e = intercept[SparkUpgradeException](invalid.collect()) - // assert(e.getCause.isInstanceOf[IllegalArgumentException]) - // assert( e.getMessage.contains( - // "You may get a different result due to the upgrading to Spark")) - // } + val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) // February val y1 = "2016-02-29" @@ -196,11 +187,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(Row(secs(ts5.getTime)), Row(null))) - // Not consistent behavior with gluten + velox. - // invalid format - // val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") - // val e = intercept[IllegalArgumentException](invalid.collect()) - // assert(e.getMessage.contains('b')) + val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) } } } @@ -232,10 +220,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra if (legacyParserPolicy == "legacy") { // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off // the fractional part of seconds. The behavior was changed by SPARK-27438. - // Ignore this test case. Velox returns null for such case. - // val legacyFmt = "yyyy/MM/dd HH:mm:ss" - // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq( - // Row(ts1), Row(ts2))) + val legacyFmt = "yyyy/MM/dd HH:mm:ss" + checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) } else { checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index ae86c9d06e81..1d74e4729525 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -112,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - // legacyParserPolicy is not respected by Gluten. // invalid format - // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") - // if (legacyParserPolicy == "legacy") { - // checkAnswer(invalid, - // Seq(Row(null), Row(null), Row(null), Row(null))) - // } else { - // val e = intercept[SparkUpgradeException](invalid.collect()) - // assert(e.getCause.isInstanceOf[IllegalArgumentException]) - // assert( e.getMessage.contains( - // "You may get a different result due to the upgrading to Spark")) - // } + val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) // February val y1 = "2016-02-29" @@ -196,11 +187,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(Row(secs(ts5.getTime)), Row(null))) - // Not consistent behavior with gluten + velox. // invalid format - // val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") - // val e = intercept[IllegalArgumentException](invalid.collect()) - // assert(e.getMessage.contains('b')) + val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) } } } @@ -232,10 +221,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra if (legacyParserPolicy == "legacy") { // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off // the fractional part of seconds. The behavior was changed by SPARK-27438. - // Ignore this test case. Velox returns null for such case. - // val legacyFmt = "yyyy/MM/dd HH:mm:ss" - // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq( - // Row(ts1), Row(ts2))) + val legacyFmt = "yyyy/MM/dd HH:mm:ss" + checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) } else { checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index e225f66bf07a..9c9a69d6a1ec 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -29,34 +29,6 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis) - test("unix_timestamp/to_unix_timestamp") { - Seq("corrected", "legacy").foreach { - time_parser_policy => - withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> time_parser_policy) { - val df = Seq("2022-01-04 14:52:40").toDF("d") - val fmt = "yyyy-MM-dd" - val r1 = df.selectExpr(s"unix_timestamp(d, '$fmt')") - val r2 = df.selectExpr(s"to_unix_timestamp(d, '$fmt')") - if (time_parser_policy == "corrected") { - checkAnswer(r1, Seq(Row(null))) - checkAnswer(r2, Seq(Row(null))) - } else { - checkAnswer(r1, Seq(Row(1641283200))) - checkAnswer(r2, Seq(Row(1641283200))) - } - val r3 = df.selectExpr(s"unix_timestamp('2022-01-04 14:52:40.017')") - val r4 = df.selectExpr(s"to_unix_timestamp('2022-01-04 14:52:40.017')") - if (time_parser_policy == "corrected") { - checkAnswer(r1, Seq(Row(null))) - checkAnswer(r2, Seq(Row(null))) - } else { - checkAnswer(r3, Seq(Row(1641336760))) - checkAnswer(r4, Seq(Row(1641336760))) - } - } - } - } - testGluten("unix_timestamp") { Seq("corrected", "legacy").foreach { legacyParserPolicy => @@ -140,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - // legacyParserPolicy is not respected by Gluten. // invalid format - // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") - // if (legacyParserPolicy == "legacy") { - // checkAnswer(invalid, - // Seq(Row(null), Row(null), Row(null), Row(null))) - // } else { - // val e = intercept[SparkUpgradeException](invalid.collect()) - // assert(e.getCause.isInstanceOf[IllegalArgumentException]) - // assert( e.getMessage.contains( - // "You may get a different result due to the upgrading to Spark")) - // } + val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) // February val y1 = "2016-02-29" @@ -224,11 +187,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(Row(secs(ts5.getTime)), Row(null))) - // Not consistent behavior with gluten + velox. - // invalid format - // val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") - // val e = intercept[IllegalArgumentException](invalid.collect()) - // assert(e.getMessage.contains('b')) + val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')") + checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null))) } } } @@ -260,10 +220,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra if (legacyParserPolicy == "legacy") { // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off // the fractional part of seconds. The behavior was changed by SPARK-27438. - // Ignore this test case. Velox returns null for such case. - // val legacyFmt = "yyyy/MM/dd HH:mm:ss" - // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq( - // Row(ts1), Row(ts2))) + val legacyFmt = "yyyy/MM/dd HH:mm:ss" + checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) } else { checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) } diff --git a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala index 94a95ae515a3..4eb5b388ac46 100644 --- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala @@ -656,6 +656,13 @@ object GlutenConfig { SPARK_SHUFFLE_FILE_BUFFER, (JavaUtils.byteStringAs(v, ByteUnit.KiB) * 1024).toString)) + conf + .get(LEGACY_TIME_PARSER_POLICY.key) + .foreach( + v => + nativeConfMap + .put(LEGACY_TIME_PARSER_POLICY.key, v.toUpperCase(Locale.ROOT))) + // Backend's dynamic session conf only. val confPrefix = prefixOf(backendName) conf From 50fc3c45b37f66080493048312e0b9ba4f927401 Mon Sep 17 00:00:00 2001 From: weixiuli Date: Fri, 24 Jan 2025 06:35:34 +0000 Subject: [PATCH 4/4] Address comments --- .../utils/velox/VeloxTestSettings.scala | 2 - .../spark/sql/GlutenDateFunctionsSuite.scala | 40 ------------------- .../utils/velox/VeloxTestSettings.scala | 2 - .../spark/sql/GlutenDateFunctionsSuite.scala | 40 ------------------- .../utils/velox/VeloxTestSettings.scala | 2 - .../spark/sql/GlutenDateFunctionsSuite.scala | 40 ------------------- .../utils/velox/VeloxTestSettings.scala | 2 - .../spark/sql/GlutenDateFunctionsSuite.scala | 40 ------------------- 8 files changed, 168 deletions(-) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 62ab86836301..0a4d7f9deda0 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -261,8 +261,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported datetime format: specifier X is not supported by velox. .exclude("to_timestamp with microseconds precision") - // Replaced by another test. - .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") // Legacy mode is not supported and velox getTimestamp function does not throw diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index ab36914595d3..aa94dc50c4a5 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -194,46 +194,6 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } - // Ported from spark with a test case for legacy mode removed. - testGluten("to_timestamp") { - Seq("legacy", "corrected").foreach { - legacyParserPolicy => - withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") - val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") - val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - - checkAnswer( - df.select(to_timestamp(col("ss"))), - df.select(timestamp_seconds(unix_timestamp(col("ss"))))) - checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2))) - if (legacyParserPolicy == "legacy") { - // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off - // the fractional part of seconds. The behavior was changed by SPARK-27438. - val legacyFmt = "yyyy/MM/dd HH:mm:ss" - checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) - } else { - checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) - } - checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2))) - checkAnswer( - df.select(to_timestamp(col("d"), "yyyy-MM-dd")), - Seq(Row(ts_date1), Row(ts_date2))) - } - } - } - testGluten("function to_date") { val d1 = Date.valueOf("2015-07-22") val d2 = Date.valueOf("2015-07-01") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 72b77ae1f95b..adba37f162cf 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1070,8 +1070,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported datetime format: specifier X is not supported by velox. .exclude("to_timestamp with microseconds precision") - // Replaced by another test. - .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") // Legacy mode is not supported and velox getTimestamp function does not throw diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index 9c9a69d6a1ec..f9c5995cafdd 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -193,46 +193,6 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } - // Ported from spark with a test case for legacy mode removed. - testGluten("to_timestamp") { - Seq("legacy", "corrected").foreach { - legacyParserPolicy => - withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") - val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") - val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - - checkAnswer( - df.select(to_timestamp(col("ss"))), - df.select(timestamp_seconds(unix_timestamp(col("ss"))))) - checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2))) - if (legacyParserPolicy == "legacy") { - // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off - // the fractional part of seconds. The behavior was changed by SPARK-27438. - val legacyFmt = "yyyy/MM/dd HH:mm:ss" - checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) - } else { - checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) - } - checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2))) - checkAnswer( - df.select(to_timestamp(col("d"), "yyyy-MM-dd")), - Seq(Row(ts_date1), Row(ts_date2))) - } - } - } - testGluten("function to_date") { val d1 = Date.valueOf("2015-07-22") val d2 = Date.valueOf("2015-07-01") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 94d3a1f6e890..d58319667147 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1100,8 +1100,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported datetime format: specifier X is not supported by velox. .exclude("to_timestamp with microseconds precision") - // Replaced by another test. - .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") // Legacy mode is not supported and velox getTimestamp function does not throw diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index 1d74e4729525..d66f26d7c09e 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -194,46 +194,6 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } - // Ported from spark with a test case for legacy mode removed. - testGluten("to_timestamp") { - Seq("legacy", "corrected").foreach { - legacyParserPolicy => - withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") - val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") - val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - - checkAnswer( - df.select(to_timestamp(col("ss"))), - df.select(timestamp_seconds(unix_timestamp(col("ss"))))) - checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2))) - if (legacyParserPolicy == "legacy") { - // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off - // the fractional part of seconds. The behavior was changed by SPARK-27438. - val legacyFmt = "yyyy/MM/dd HH:mm:ss" - checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) - } else { - checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) - } - checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2))) - checkAnswer( - df.select(to_timestamp(col("d"), "yyyy-MM-dd")), - Seq(Row(ts_date1), Row(ts_date2))) - } - } - } - testGluten("function to_date") { val d1 = Date.valueOf("2015-07-22") val d2 = Date.valueOf("2015-07-01") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 73c4d43ced53..2bb567dd68c4 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1122,8 +1122,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported datetime format: specifier X is not supported by velox. .exclude("to_timestamp with microseconds precision") - // Replaced by another test. - .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") // Legacy mode is not supported and velox getTimestamp function does not throw diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index 9c9a69d6a1ec..f9c5995cafdd 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -193,46 +193,6 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } - // Ported from spark with a test case for legacy mode removed. - testGluten("to_timestamp") { - Seq("legacy", "corrected").foreach { - legacyParserPolicy => - withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") - val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") - val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - - checkAnswer( - df.select(to_timestamp(col("ss"))), - df.select(timestamp_seconds(unix_timestamp(col("ss"))))) - checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1), Row(ts2))) - if (legacyParserPolicy == "legacy") { - // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off - // the fractional part of seconds. The behavior was changed by SPARK-27438. - val legacyFmt = "yyyy/MM/dd HH:mm:ss" - checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(Row(ts1), Row(ts2))) - } else { - checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m), Row(ts2m))) - } - checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1), Row(ts2))) - checkAnswer( - df.select(to_timestamp(col("d"), "yyyy-MM-dd")), - Seq(Row(ts_date1), Row(ts_date2))) - } - } - } - testGluten("function to_date") { val d1 = Date.valueOf("2015-07-22") val d2 = Date.valueOf("2015-07-01")