diff --git a/.github/workflows/util/install_spark_resources.sh b/.github/workflows/util/install_spark_resources.sh index 1b00fe3ff293..ad454f601a1c 100755 --- a/.github/workflows/util/install_spark_resources.sh +++ b/.github/workflows/util/install_spark_resources.sh @@ -50,13 +50,13 @@ case "$1" in 3.4) # Spark-3.4 cd ${INSTALL_DIR} && \ - wget -nv https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz && \ - tar --strip-components=1 -xf spark-3.4.3-bin-hadoop3.tgz spark-3.4.3-bin-hadoop3/jars/ && \ - rm -rf spark-3.4.3-bin-hadoop3.tgz && \ + wget -nv https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.4.4-bin-hadoop3.tgz spark-3.4.4-bin-hadoop3/jars/ && \ + rm -rf spark-3.4.4-bin-hadoop3.tgz && \ mkdir -p ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ - wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.3.tar.gz && \ - tar --strip-components=1 -xf v3.4.3.tar.gz spark-3.4.3/sql/core/src/test/resources/ && \ + wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.4.tar.gz && \ + tar --strip-components=1 -xf v3.4.4.tar.gz spark-3.4.4/sql/core/src/test/resources/ && \ mkdir -p shims/spark34/spark_home/ && \ mv sql shims/spark34/spark_home/ ;; diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 47a24e978fb4..718021058263 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -46,6 +46,23 @@ env: WGET_CMD: 'wget -nv' SETUP: 'bash .github/workflows/util/setup_helper.sh' CCACHE_DIR: "${{ github.workspace }}/.ccache" + EXTRA_FLAGS: "-XX:+IgnoreUnrecognizedVMOptions + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + -Djdk.reflect.useDirectMethodHandle=false + -Dio.netty.tryReflectionSetAccessible=true" concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} @@ -734,7 +751,7 @@ jobs: run-spark-test-spark34: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -747,20 +764,25 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Prepare + - name: Prepare spark.test.home for Spark 3.4.4 (other tests) run: | + rm -rf /opt/shims/spark34 + bash .github/workflows/util/install_spark_resources.sh 3.4 dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.4.3 cython && \ + pip3 install pyspark==3.4.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.4.3 (other tests) + - name: Build and Run unit test for Spark 3.4.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 - $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export SPARK_HOME=/opt/shims/spark34/spark_home/ + ls -l /opt/shims/spark34/spark_home/ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ + -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \ + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}" - name: Upload test report if: always() uses: actions/upload-artifact@v4 @@ -778,7 +800,7 @@ jobs: run-spark-test-spark34-slow: needs: build-native-lib-centos-7 runs-on: ubuntu-20.04 - container: apache/gluten:centos-8 + container: apache/gluten:centos-8-jdk17 steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -791,12 +813,19 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Build and Run unit test for Spark 3.4.3 (slow tests) + - name: Prepare + run: | + rm -rf /opt/shims/spark34 + bash .github/workflows/util/install_spark_resources.sh 3.4 + - name: Build and Run unit test for Spark 3.4.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ - $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ - -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \ - -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export SPARK_HOME=/opt/shims/spark34/spark_home/ + ls -l /opt/shims/spark34/spark_home/ + $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ + -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \ + -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}" - name: Upload test report if: always() uses: actions/upload-artifact@v4 diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 6ed65f283e47..0371ea6168a5 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -9,7 +9,7 @@ parent: Getting-Started | Type | Version | |-------|------------------------------| -| Spark | 3.2.2, 3.3.1, 3.4.3, 3.5.2 | +| Spark | 3.2.2, 3.3.1, 3.4.4, 3.5.2 | | OS | Ubuntu20.04/22.04, Centos7/8 | | jdk | openjdk8/jdk17 | | scala | 2.12 | @@ -18,7 +18,7 @@ parent: Getting-Started Currently, with static build Gluten+Velox backend supports all the Linux OSes, but is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. With dynamic build, Gluten+Velox backend support **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8** and their variants. -Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.3 and 3.5.2. +Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.4 and 3.5.2. We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**. diff --git a/docs/get-started/build-guide.md b/docs/get-started/build-guide.md index f135999cc608..45556733490d 100644 --- a/docs/get-started/build-guide.md +++ b/docs/get-started/build-guide.md @@ -73,5 +73,5 @@ It's name pattern is `gluten--bundle-spark_< |---------------|----------------------|----------------------| | 3.2.2 | 3.2 | 2.12 | | 3.3.1 | 3.3 | 2.12 | -| 3.4.3 | 3.4 | 2.12 | +| 3.4.4 | 3.4 | 2.12 | | 3.5.2 | 3.5 | 2.12 | diff --git a/gluten-ut/pom.xml b/gluten-ut/pom.xml index ca41d580aa46..8182b8d5bdde 100644 --- a/gluten-ut/pom.xml +++ b/gluten-ut/pom.xml @@ -120,7 +120,7 @@ org.mockito mockito-core - 2.23.4 + 4.6.1 test diff --git a/pom.xml b/pom.xml index 8fa2f355030b..21c5ac366993 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ 2.12.15 3 3.4 - 3.4.3 + 3.4.4 spark-sql-columnar-shims-spark34 1.5.0 delta-core @@ -322,8 +322,8 @@ 3.4 spark-sql-columnar-shims-spark34 - 3.4.3 - 1.5.0 + 3.4.4 + 1.7.1 delta-core 2.4.0 24 diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala index 2e642366e011..c79626eb21bf 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala @@ -20,7 +20,7 @@ import org.apache.gluten.sql.shims.{SparkShimDescriptor, SparkShims} import org.apache.gluten.sql.shims.spark34.SparkShimProvider.DESCRIPTOR object SparkShimProvider { - val DESCRIPTOR = SparkShimDescriptor(3, 4, 3) + val DESCRIPTOR = SparkShimDescriptor(3, 4, 4) } class SparkShimProvider extends org.apache.gluten.sql.shims.SparkShimProvider { diff --git a/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala b/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala index 9e684c2afdd4..95b15f04e7cb 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/shuffle/SparkSortShuffleWriterUtil.scala @@ -27,6 +27,6 @@ object SparkSortShuffleWriterUtil { context: TaskContext, writeMetrics: ShuffleWriteMetricsReporter, shuffleExecutorComponents: ShuffleExecutorComponents): ShuffleWriter[K, V] = { - new SortShuffleWriter(handle, mapId, context, shuffleExecutorComponents) + new SortShuffleWriter(handle, mapId, context, writeMetrics, shuffleExecutorComponents) } } diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index 89738668039a..22256bd281ec 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -18,7 +18,7 @@ ${java.version} ${java.version} 2.12.17 - 3.4.3 + 3.4.4 2.12 3 0.3.2-incubating @@ -163,7 +163,7 @@ spark-3.4 - 3.4.3 + 3.4.4 2.12.17