From e82a3b2c462ea412ef4452b322562c46edd07206 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Sun, 3 Nov 2024 19:54:16 +0800 Subject: [PATCH 01/21] [CORE] Support Spark-344 Signed-off-by: Yuan Zhou --- pom.xml | 2 +- .../org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala | 2 +- .../org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 018cd597ac24..1474f93a75b1 100644 --- a/pom.xml +++ b/pom.xml @@ -322,7 +322,7 @@ 3.4 spark-sql-columnar-shims-spark34 - 3.4.3 + 3.4.4 1.5.0 delta-core 2.4.0 diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala index 2e642366e011..c79626eb21bf 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala @@ -20,7 +20,7 @@ import org.apache.gluten.sql.shims.{SparkShimDescriptor, SparkShims} import org.apache.gluten.sql.shims.spark34.SparkShimProvider.DESCRIPTOR object SparkShimProvider { - val DESCRIPTOR = SparkShimDescriptor(3, 4, 3) + val DESCRIPTOR = SparkShimDescriptor(3, 4, 4) } class SparkShimProvider extends org.apache.gluten.sql.shims.SparkShimProvider { diff --git a/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala b/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala index 9e684c2afdd4..95b15f04e7cb 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala @@ -27,6 +27,6 @@ object SparkSortShuffleWriterUtil { context: TaskContext, writeMetrics: ShuffleWriteMetricsReporter, shuffleExecutorComponents: ShuffleExecutorComponents): ShuffleWriter[K, V] = { - new SortShuffleWriter(handle, mapId, context, shuffleExecutorComponents) + new SortShuffleWriter(handle, mapId, context, writeMetrics, shuffleExecutorComponents) } } From 161f15110cf6f8566dba3594d5a06950af41bc89 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Mon, 4 Nov 2024 07:54:38 +0800 Subject: [PATCH 02/21] fix spark 344 unit tests Signed-off-by: Yuan Zhou --- .github/workflows/util/install_spark_resources.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/util/install_spark_resources.sh b/.github/workflows/util/install_spark_resources.sh index 1b00fe3ff293..ad454f601a1c 100755 --- a/.github/workflows/util/install_spark_resources.sh +++ b/.github/workflows/util/install_spark_resources.sh @@ -50,13 +50,13 @@ case "$1" in 3.4) # Spark-3.4 cd ${INSTALL_DIR} && \ - wget -nv https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz && \ - tar --strip-components=1 -xf spark-3.4.3-bin-hadoop3.tgz spark-3.4.3-bin-hadoop3/jars/ && \ - rm -rf spark-3.4.3-bin-hadoop3.tgz && \ + wget -nv https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.4.4-bin-hadoop3.tgz spark-3.4.4-bin-hadoop3/jars/ && \ + rm -rf spark-3.4.4-bin-hadoop3.tgz && \ mkdir -p ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ - wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.3.tar.gz && \ - tar --strip-components=1 -xf v3.4.3.tar.gz spark-3.4.3/sql/core/src/test/resources/ && \ + wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.4.tar.gz && \ + tar --strip-components=1 -xf v3.4.4.tar.gz spark-3.4.4/sql/core/src/test/resources/ && \ mkdir -p shims/spark34/spark_home/ && \ mv sql shims/spark34/spark_home/ ;; From acb7e195d78a91d6e947b0c8198d7d249276d519 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Mon, 4 Nov 2024 11:29:55 +0800 Subject: [PATCH 03/21] fix spark version Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 27 +++++++++++++++++++++++---- docs/get-started/Velox.md | 4 ++-- docs/get-started/build-guide.md | 2 +- pom.xml | 2 +- tools/gluten-it/pom.xml | 4 ++-- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index ec79bc8b1bea..d7882c3647af 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -747,14 +747,22 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Prepare + - name: Update mirror list + run: | + sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true + sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true + - name: Setup build dependency + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y + $SETUP install_maven + - name: Prepare spark.test.home for Spark 3.4.4 (other tests) run: | dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.4.3 cython && \ + pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.4.3 (other tests) + - name: Build and Run unit test for Spark 3.4.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -791,7 +799,18 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Build and Run unit test for Spark 3.4.3 (slow tests) + - name: Update mirror list + run: | + sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true + sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true + - name: Setup build dependency + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y + $SETUP install_maven + - name: Prepare spark.test.home for Spark 3.4.4 (slow tests) + run: | + bash .github/workflows/util/install_spark_resources.sh 3.4 + - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 81bb88c75aec..dfe7dbf53c8d 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -9,7 +9,7 @@ parent: Getting-Started | Type | Version | |-------|------------------------------| -| Spark | 3.2.2, 3.3.1, 3.4.3, 3.5.1 | +| Spark | 3.2.2, 3.3.1, 3.4.4, 3.5.1 | | OS | Ubuntu20.04/22.04, Centos7/8 | | jdk | openjdk8/jdk17 | | scala | 2.12 | @@ -18,7 +18,7 @@ parent: Getting-Started Currently, with static build Gluten+Velox backend supports all the Linux OSes, but is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. With dynamic build, Gluten+Velox backend support **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8** and their variants. -Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.3 and 3.5.1. +Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.4 and 3.5.1. We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**. diff --git a/docs/get-started/build-guide.md b/docs/get-started/build-guide.md index d9c3beaab9dc..32b9ce732b7a 100644 --- a/docs/get-started/build-guide.md +++ b/docs/get-started/build-guide.md @@ -73,5 +73,5 @@ It's name pattern is `gluten--bundle-spark_< |---------------|----------------------|----------------------| | 3.2.2 | 3.2 | 2.12 | | 3.3.1 | 3.3 | 2.12 | -| 3.4.3 | 3.4 | 2.12 | +| 3.4.4 | 3.4 | 2.12 | | 3.5.1 | 3.5 | 2.12 | diff --git a/pom.xml b/pom.xml index 1474f93a75b1..e987759ebdb6 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ 2.12.15 3 3.4 - 3.4.3 + 3.4.4 spark-sql-columnar-shims-spark34 1.5.0 delta-core diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index 9b1cf10df891..570e45af66d4 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -18,7 +18,7 @@ ${java.version} ${java.version} 2.12.17 - 3.4.3 + 3.4.4 2.12 3 0.3.2-incubating @@ -163,7 +163,7 @@ spark-3.4 - 3.4.3 + 3.4.4 2.12.17 From 9f619ce25d16ae8f7a93e4b9a6d67c60b0f30c07 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Tue, 5 Nov 2024 12:55:07 +0800 Subject: [PATCH 04/21] bump iceberg version to 1.6.1 Signed-off-by: Yuan Zhou --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e987759ebdb6..352e868704ab 100644 --- a/pom.xml +++ b/pom.xml @@ -323,7 +323,7 @@ 3.4 spark-sql-columnar-shims-spark34 3.4.4 - 1.5.0 + 1.6.1 delta-core 2.4.0 24 From fb9a37fedac92135c270d7b52f76c62eec2d74cb Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Mon, 9 Dec 2024 14:42:44 +0800 Subject: [PATCH 05/21] bump iceberg version Signed-off-by: Yuan Zhou --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 352e868704ab..393c152265f9 100644 --- a/pom.xml +++ b/pom.xml @@ -323,7 +323,7 @@ 3.4 spark-sql-columnar-shims-spark34 3.4.4 - 1.6.1 + 1.7.1 delta-core 2.4.0 24 From 73fc2113b033c570f9f9e0cf6bf23dee3d609de2 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 8 Jan 2025 22:31:30 +0800 Subject: [PATCH 06/21] jdk11 Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index d7882c3647af..840a8b07e663 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -762,11 +762,13 @@ jobs: pip3 install setuptools && \ pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.4.4 (other tests) + yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel + - name: Build and Run unit test for Spark 3.4.3 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 - $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags - name: Upload test report From 7a82b46deccfb0a17435612d137b86048816111b Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 8 Jan 2025 23:06:14 +0800 Subject: [PATCH 07/21] jdk11 Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 840a8b07e663..f2f46a3d8e05 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -763,7 +763,7 @@ jobs: pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel - - name: Build and Run unit test for Spark 3.4.3 (other tests) + - name: Build and Run unit test for Spark 3.4.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -801,21 +801,14 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Update mirror list - run: | - sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true - sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true - - name: Setup build dependency - run: | - yum install sudo patch java-1.8.0-openjdk-devel wget -y - $SETUP install_maven - - name: Prepare spark.test.home for Spark 3.4.4 (slow tests) + - name: Prepare run: | - bash .github/workflows/util/install_spark_resources.sh 3.4 + yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ - $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest - name: Upload test report From 05d3acb47b884502c747e5e049325c83979cae4a Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jan 2025 07:36:14 +0800 Subject: [PATCH 08/21] netty Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index f2f46a3d8e05..34551b70276e 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -770,7 +770,8 @@ jobs: export JAVA_HOME=/usr/lib/jvm/java-11-openjdk $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags + -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ + -DargLine="-Dio.netty.tryReflectionSetAccessible=true" - name: Upload test report if: always() uses: actions/upload-artifact@v4 @@ -810,7 +811,8 @@ jobs: export JAVA_HOME=/usr/lib/jvm/java-11-openjdk $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ + -DargLine="-Dio.netty.tryReflectionSetAccessible=true" - name: Upload test report if: always() uses: actions/upload-artifact@v4 From f2f14abf173508d43c9e566b26588d3070e30a04 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jan 2025 13:18:32 +0800 Subject: [PATCH 09/21] disable arrow suite Signed-off-by: Yuan Zhou --- .../python/ArrowEvalPythonExecSuite.scala | 102 ------------------ 1 file changed, 102 deletions(-) delete mode 100644 backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala deleted file mode 100644 index c2a191a20d0b..000000000000 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.gluten.execution.python - -import org.apache.gluten.execution.WholeStageTransformerSuite - -import org.apache.spark.SparkConf -import org.apache.spark.api.python.ColumnarArrowEvalPythonExec -import org.apache.spark.sql.IntegratedUDFTestUtils - -class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite { - - import IntegratedUDFTestUtils._ - import testImplicits.localSeqToDatasetHolder - import testImplicits.newProductEncoder - - override protected val resourcePath: String = "/tpch-data-parquet" - override protected val fileFormat: String = "parquet" - val pyarrowTestUDF = TestScalarPandasUDF(name = "pyarrowUDF") - - override def sparkConf: SparkConf = { - super.sparkConf - .set("spark.sql.shuffle.partitions", "1") - .set("spark.default.parallelism", "1") - .set("spark.executor.cores", "1") - } - - test("arrow_udf test: without projection") { - lazy val base = - Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) - .toDF("a", "b") - lazy val expected = Seq( - ("1", "1"), - ("1", "1"), - ("2", "2"), - ("2", "2"), - ("3", "3"), - ("3", "3"), - ("0", "0"), - ("3", "3") - ).toDF("a", "p_a") - - val df2 = base.select("a").withColumn("p_a", pyarrowTestUDF(base("a"))) - checkSparkOperatorMatch[ColumnarArrowEvalPythonExec](df2) - checkAnswer(df2, expected) - } - - test("arrow_udf test: with unrelated projection") { - lazy val base = - Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) - .toDF("a", "b") - lazy val expected = Seq( - ("1", 1, "1", 2), - ("1", 2, "1", 4), - ("2", 1, "2", 2), - ("2", 2, "2", 4), - ("3", 1, "3", 2), - ("3", 2, "3", 4), - ("0", 1, "0", 2), - ("3", 0, "3", 0) - ).toDF("a", "b", "p_a", "d_b") - - val df = base.withColumn("p_a", pyarrowTestUDF(base("a"))).withColumn("d_b", base("b") * 2) - checkSparkOperatorMatch[ColumnarArrowEvalPythonExec](df) - checkAnswer(df, expected) - } - - test("arrow_udf test: with preprojection") { - lazy val base = - Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) - .toDF("a", "b") - lazy val expected = Seq( - ("1", 1, 2, "1", 2), - ("1", 2, 4, "1", 4), - ("2", 1, 2, "2", 2), - ("2", 2, 4, "2", 4), - ("3", 1, 2, "3", 2), - ("3", 2, 4, "3", 4), - ("0", 1, 2, "0", 2), - ("3", 0, 0, "3", 0) - ).toDF("a", "b", "d_b", "p_a", "p_b") - val df = base - .withColumn("d_b", base("b") * 2) - .withColumn("p_a", pyarrowTestUDF(base("a"))) - .withColumn("p_b", pyarrowTestUDF(base("b") * 2)) - checkAnswer(df, expected) - } -} From 535cd027c0171b68c0ab55ea38934f522f83fb3e Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jan 2025 14:38:33 +0800 Subject: [PATCH 10/21] update spark Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 34551b70276e..b1b44c861b9f 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -757,12 +757,14 @@ jobs: $SETUP install_maven - name: Prepare spark.test.home for Spark 3.4.4 (other tests) run: | + bash .github/workflows/util/install_spark_resources.sh 3.4 dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel + - name: Build and Run unit test for Spark 3.4.4 (other tests) run: | cd $GITHUB_WORKSPACE/ @@ -805,6 +807,7 @@ jobs: - name: Prepare run: | yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel + bash .github/workflows/util/install_spark_resources.sh 3.4 - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ From 78fcd030a2e0b1cb8d573113f5125b65c60ecd64 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jan 2025 14:56:42 +0800 Subject: [PATCH 11/21] fix Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index b1b44c861b9f..ff217d2b2ede 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -757,6 +757,7 @@ jobs: $SETUP install_maven - name: Prepare spark.test.home for Spark 3.4.4 (other tests) run: | + rm -rf /opt/shims/spark34 bash .github/workflows/util/install_spark_resources.sh 3.4 dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ @@ -807,6 +808,7 @@ jobs: - name: Prepare run: | yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel + rm -rf /opt/shims/spark34 bash .github/workflows/util/install_spark_resources.sh 3.4 - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | From c2dfac68e3565215907270c10ad7577bd605dc1d Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 9 Jan 2025 16:51:01 +0800 Subject: [PATCH 12/21] remove dead code Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index ff217d2b2ede..3c198cf83f6a 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -747,14 +747,6 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Update mirror list - run: | - sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true - sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true - - name: Setup build dependency - run: | - yum install sudo patch java-1.8.0-openjdk-devel wget -y - $SETUP install_maven - name: Prepare spark.test.home for Spark 3.4.4 (other tests) run: | rm -rf /opt/shims/spark34 @@ -771,6 +763,7 @@ jobs: cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ @@ -814,6 +807,7 @@ jobs: run: | cd $GITHUB_WORKSPACE/ export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ From 596602f54369dca307fba85d212ee365a3d8dac9 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Tue, 14 Jan 2025 11:34:07 +0800 Subject: [PATCH 13/21] update to use jdk11 docker Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 3c198cf83f6a..f5f6f458bdc8 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -734,7 +734,7 @@ jobs: run-spark-test-spark34: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8 + container: apache/gluten:centos-8-jdk11 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -756,8 +756,6 @@ jobs: pip3 install setuptools && \ pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow - yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel - - name: Build and Run unit test for Spark 3.4.4 (other tests) run: | cd $GITHUB_WORKSPACE/ @@ -785,7 +783,7 @@ jobs: run-spark-test-spark34-slow: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8 + container: apache/gluten:centos-8-jdk11 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -800,7 +798,6 @@ jobs: path: /root/.m2/repository/org/apache/arrow/ - name: Prepare run: | - yum remove -y java-1.8.0-openjdk-devel && yum -y install java-11-openjdk-devel rm -rf /opt/shims/spark34 bash .github/workflows/util/install_spark_resources.sh 3.4 - name: Build and Run unit test for Spark 3.4.4 (slow tests) From 893fbcace4f5b1e21a563b5571c71039b056ce97 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Tue, 14 Jan 2025 15:04:53 +0800 Subject: [PATCH 14/21] adding spark home Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index f5f6f458bdc8..752b96431258 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -761,6 +761,7 @@ jobs: cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ @@ -804,6 +805,7 @@ jobs: run: | cd $GITHUB_WORKSPACE/ export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ From 23293aa7d9ad868f128fa8ff7d2edbdb51b238f6 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Tue, 14 Jan 2025 21:09:02 +0800 Subject: [PATCH 15/21] Revert "disable arrow suite" This reverts commit f2f14abf173508d43c9e566b26588d3070e30a04. --- .../python/ArrowEvalPythonExecSuite.scala | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala new file mode 100644 index 000000000000..c2a191a20d0b --- /dev/null +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution.python + +import org.apache.gluten.execution.WholeStageTransformerSuite + +import org.apache.spark.SparkConf +import org.apache.spark.api.python.ColumnarArrowEvalPythonExec +import org.apache.spark.sql.IntegratedUDFTestUtils + +class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite { + + import IntegratedUDFTestUtils._ + import testImplicits.localSeqToDatasetHolder + import testImplicits.newProductEncoder + + override protected val resourcePath: String = "/tpch-data-parquet" + override protected val fileFormat: String = "parquet" + val pyarrowTestUDF = TestScalarPandasUDF(name = "pyarrowUDF") + + override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.sql.shuffle.partitions", "1") + .set("spark.default.parallelism", "1") + .set("spark.executor.cores", "1") + } + + test("arrow_udf test: without projection") { + lazy val base = + Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) + .toDF("a", "b") + lazy val expected = Seq( + ("1", "1"), + ("1", "1"), + ("2", "2"), + ("2", "2"), + ("3", "3"), + ("3", "3"), + ("0", "0"), + ("3", "3") + ).toDF("a", "p_a") + + val df2 = base.select("a").withColumn("p_a", pyarrowTestUDF(base("a"))) + checkSparkOperatorMatch[ColumnarArrowEvalPythonExec](df2) + checkAnswer(df2, expected) + } + + test("arrow_udf test: with unrelated projection") { + lazy val base = + Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) + .toDF("a", "b") + lazy val expected = Seq( + ("1", 1, "1", 2), + ("1", 2, "1", 4), + ("2", 1, "2", 2), + ("2", 2, "2", 4), + ("3", 1, "3", 2), + ("3", 2, "3", 4), + ("0", 1, "0", 2), + ("3", 0, "3", 0) + ).toDF("a", "b", "p_a", "d_b") + + val df = base.withColumn("p_a", pyarrowTestUDF(base("a"))).withColumn("d_b", base("b") * 2) + checkSparkOperatorMatch[ColumnarArrowEvalPythonExec](df) + checkAnswer(df, expected) + } + + test("arrow_udf test: with preprojection") { + lazy val base = + Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0)) + .toDF("a", "b") + lazy val expected = Seq( + ("1", 1, 2, "1", 2), + ("1", 2, 4, "1", 4), + ("2", 1, 2, "2", 2), + ("2", 2, 4, "2", 4), + ("3", 1, 2, "3", 2), + ("3", 2, 4, "3", 4), + ("0", 1, 2, "0", 2), + ("3", 0, 0, "3", 0) + ).toDF("a", "b", "d_b", "p_a", "p_b") + val df = base + .withColumn("d_b", base("b") * 2) + .withColumn("p_a", pyarrowTestUDF(base("a"))) + .withColumn("p_b", pyarrowTestUDF(base("b") * 2)) + checkAnswer(df, expected) + } +} From 19bef4281c8c8037e3a1ac9aabf19eec29f080f2 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 15 Jan 2025 10:56:36 +0800 Subject: [PATCH 16/21] fix argLine in mvn test Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 752b96431258..d01272e75877 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -764,8 +764,8 @@ jobs: export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DargLine="-Dio.netty.tryReflectionSetAccessible=true" - name: Upload test report if: always() @@ -808,8 +808,8 @@ jobs: export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DargLine="-Dio.netty.tryReflectionSetAccessible=true" - name: Upload test report if: always() From 1e504a35fcebcbc9318ee75421bdf7750f52d3c3 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 15 Jan 2025 13:46:19 +0800 Subject: [PATCH 17/21] jdk17 Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index d01272e75877..27c1f4111d32 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -734,7 +734,7 @@ jobs: run-spark-test-spark34: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk11 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -760,10 +760,10 @@ jobs: run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 - export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DargLine="-Dio.netty.tryReflectionSetAccessible=true" @@ -784,7 +784,7 @@ jobs: run-spark-test-spark34-slow: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk11 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -804,10 +804,10 @@ jobs: - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ - export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ -DargLine="-Dio.netty.tryReflectionSetAccessible=true" From 396e63f7f4f3037cf25d85d795f719d65b38a3e8 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 15 Jan 2025 14:08:48 +0800 Subject: [PATCH 18/21] fix jdk flags Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 27c1f4111d32..68d25e74fe73 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -765,8 +765,7 @@ jobs: ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DargLine="-Dio.netty.tryReflectionSetAccessible=true" + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" - name: Upload test report if: always() uses: actions/upload-artifact@v4 @@ -809,8 +808,7 @@ jobs: ls -l /opt/shims/spark34/spark_home/ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DargLine="-Dio.netty.tryReflectionSetAccessible=true" + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" - name: Upload test report if: always() uses: actions/upload-artifact@v4 From 372ed093c86073a16f6e80df9e2e5df4bd841ae5 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 15 Jan 2025 14:24:27 +0800 Subject: [PATCH 19/21] Revert "jdk17" This reverts commit 1e504a35fcebcbc9318ee75421bdf7750f52d3c3. --- .github/workflows/velox_backend.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 68d25e74fe73..2138304433e9 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -734,7 +734,7 @@ jobs: run-spark-test-spark34: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk17 + container: apache/gluten:centos-8-jdk11 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -760,10 +760,10 @@ jobs: run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 - export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" - name: Upload test report @@ -783,7 +783,7 @@ jobs: run-spark-test-spark34-slow: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk17 + container: apache/gluten:centos-8-jdk11 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -803,10 +803,10 @@ jobs: - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ - export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" - name: Upload test report From 0927ca2968be03d71b43ff452cfb9060b7bcdc2d Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Wed, 15 Jan 2025 13:46:19 +0800 Subject: [PATCH 20/21] jdk17 with right flags Signed-off-by: Yuan Zhou --- .github/workflows/velox_backend.yml | 33 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 2138304433e9..bcb5edc69fba 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -46,6 +46,23 @@ env: WGET_CMD: 'wget -nv' SETUP: 'bash .github/workflows/util/setup_helper.sh' CCACHE_DIR: "${{ github.workspace }}/.ccache" + EXTRA_FLAGS: "-XX:+IgnoreUnrecognizedVMOptions + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + -Djdk.reflect.useDirectMethodHandle=false + -Dio.netty.tryReflectionSetAccessible=true" concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} @@ -734,7 +751,7 @@ jobs: run-spark-test-spark34: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk11 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -760,12 +777,12 @@ jobs: run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 - export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}" - name: Upload test report if: always() uses: actions/upload-artifact@v4 @@ -783,7 +800,7 @@ jobs: run-spark-test-spark34-slow: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8-jdk11 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -803,12 +820,12 @@ jobs: - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ - export JAVA_HOME=/usr/lib/jvm/java-11-openjdk + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk export SPARK_HOME=/opt/shims/spark34/spark_home/ ls -l /opt/shims/spark34/spark_home/ - $MVN_CMD clean test -Pspark-3.4 -Pjava-11 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ -Dio.netty.tryReflectionSetAccessible=true -Djdk.reflect.useDirectMethodHandle=false" + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}" - name: Upload test report if: always() uses: actions/upload-artifact@v4 From 85e7ea7d0643da9c732747026c98f536f71d1df4 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 16 Jan 2025 07:56:10 +0800 Subject: [PATCH 21/21] update mockito to 4.6.1 Signed-off-by: Yuan Zhou --- gluten-ut/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-ut/pom.xml b/gluten-ut/pom.xml index 89d1ff9bc9ae..bdb1128b4857 100644 --- a/gluten-ut/pom.xml +++ b/gluten-ut/pom.xml @@ -120,7 +120,7 @@ org.mockito mockito-core - 2.23.4 + 4.6.1 test