Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/util/install_spark_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ case "$1" in
3.4)
# Spark-3.4
cd ${INSTALL_DIR} && \
wget -nv https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz && \
tar --strip-components=1 -xf spark-3.4.3-bin-hadoop3.tgz spark-3.4.3-bin-hadoop3/jars/ && \
rm -rf spark-3.4.3-bin-hadoop3.tgz && \
wget -nv https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz && \
tar --strip-components=1 -xf spark-3.4.4-bin-hadoop3.tgz spark-3.4.4-bin-hadoop3/jars/ && \
rm -rf spark-3.4.4-bin-hadoop3.tgz && \
mkdir -p ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \
mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \
wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.3.tar.gz && \
tar --strip-components=1 -xf v3.4.3.tar.gz spark-3.4.3/sql/core/src/test/resources/ && \
wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.4.tar.gz && \
tar --strip-components=1 -xf v3.4.4.tar.gz spark-3.4.4/sql/core/src/test/resources/ && \
mkdir -p shims/spark34/spark_home/ && \
mv sql shims/spark34/spark_home/
;;
Expand Down
53 changes: 41 additions & 12 deletions .github/workflows/velox_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ env:
WGET_CMD: 'wget -nv'
SETUP: 'bash .github/workflows/util/setup_helper.sh'
CCACHE_DIR: "${{ github.workspace }}/.ccache"
EXTRA_FLAGS: "-XX:+IgnoreUnrecognizedVMOptions
--add-opens=java.base/java.lang=ALL-UNNAMED
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED
--add-opens=java.base/java.io=ALL-UNNAMED
--add-opens=java.base/java.net=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/java.util=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED
--add-opens=java.base/sun.security.action=ALL-UNNAMED
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED
-Djdk.reflect.useDirectMethodHandle=false
-Dio.netty.tryReflectionSetAccessible=true"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand Down Expand Up @@ -734,7 +751,7 @@ jobs:
run-spark-test-spark34:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
container: apache/gluten:centos-8-jdk17
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
Expand All @@ -747,20 +764,25 @@ jobs:
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare
- name: Prepare spark.test.home for Spark 3.4.4 (other tests)
run: |
rm -rf /opt/shims/spark34
bash .github/workflows/util/install_spark_resources.sh 3.4
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.4.3 cython && \
pip3 install pyspark==3.4.4 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.4.3 (other tests)
- name: Build and Run unit test for Spark 3.4.4 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l /opt/shims/spark34/spark_home/
$MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems Spark 344 UT will only run with jdk17, will we drop UT+jdk8 support in the future?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems Spark 344 UT will only run with jdk17, will we drop UT+jdk8 support in the future?

Hi @Yohahaha,

In general we are trying to bump the JDK support in 1.4 release, due to many dependent libs dropped JDK8 support.

  • The main reason for this bump is due to iceberg dropped JDK8 support starts from 1.7.0(https://lists.apache.org/thread/xd4sjd0cx010qcv9qmlt89zdx4s1cyp4). JDK8 should be working if -Piceberg is not activated
  • based on my local tests, JDK11 is also working, the current latest LTS JDK17 seems more preferred based on what I collected so far
  • If JDK8 requirements are strong in the community, we could add back the JDK8 based unit tests CI(without -Piceberg)

Cc: @weiting-chen

Thanks, -yuan

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for the explanation! @zhouyuan

based on my local tests, JDK11 is also working, the current latest LTS JDK17 seems more preferred based on what I collected so far

yes, spark 4.0 requires JDK17+

-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags \
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
Expand All @@ -778,7 +800,7 @@ jobs:
run-spark-test-spark34-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
container: apache/gluten:centos-8-jdk17
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
Expand All @@ -791,12 +813,19 @@ jobs:
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Build and Run unit test for Spark 3.4.3 (slow tests)
- name: Prepare
run: |
rm -rf /opt/shims/spark34
bash .github/workflows/util/install_spark_resources.sh 3.4
- name: Build and Run unit test for Spark 3.4.4 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l /opt/shims/spark34/spark_home/
$MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
Expand Down
4 changes: 2 additions & 2 deletions docs/get-started/Velox.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parent: Getting-Started

| Type | Version |
|-------|------------------------------|
| Spark | 3.2.2, 3.3.1, 3.4.3, 3.5.2 |
| Spark | 3.2.2, 3.3.1, 3.4.4, 3.5.2 |
| OS | Ubuntu20.04/22.04, Centos7/8 |
| jdk | openjdk8/jdk17 |
| scala | 2.12 |
Expand All @@ -18,7 +18,7 @@ parent: Getting-Started

Currently, with static build Gluten+Velox backend supports all the Linux OSes, but is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. With dynamic build, Gluten+Velox backend support **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8** and their variants.

Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.3 and 3.5.2.
Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.4 and 3.5.2.

We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**.

Expand Down
2 changes: 1 addition & 1 deletion docs/get-started/build-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ It's name pattern is `gluten-<backend_type>-bundle-spark<spark.bundle.version>_<
|---------------|----------------------|----------------------|
| 3.2.2 | 3.2 | 2.12 |
| 3.3.1 | 3.3 | 2.12 |
| 3.4.3 | 3.4 | 2.12 |
| 3.4.4 | 3.4 | 2.12 |
| 3.5.2 | 3.5 | 2.12 |
2 changes: 1 addition & 1 deletion gluten-ut/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>2.23.4</version>
<version>4.6.1</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
<scala.version>2.12.15</scala.version>
<spark.major.version>3</spark.major.version>
<sparkbundle.version>3.4</sparkbundle.version>
<spark.version>3.4.3</spark.version>
<spark.version>3.4.4</spark.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId>
<iceberg.version>1.5.0</iceberg.version>
<delta.package.name>delta-core</delta.package.name>
Expand Down Expand Up @@ -322,8 +322,8 @@
<properties>
<sparkbundle.version>3.4</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId>
<spark.version>3.4.3</spark.version>
<iceberg.version>1.5.0</iceberg.version>
<spark.version>3.4.4</spark.version>
<iceberg.version>1.7.1</iceberg.version>
<delta.package.name>delta-core</delta.package.name>
<delta.version>2.4.0</delta.version>
<delta.binary.version>24</delta.binary.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.gluten.sql.shims.{SparkShimDescriptor, SparkShims}
import org.apache.gluten.sql.shims.spark34.SparkShimProvider.DESCRIPTOR

object SparkShimProvider {
val DESCRIPTOR = SparkShimDescriptor(3, 4, 3)
val DESCRIPTOR = SparkShimDescriptor(3, 4, 4)
}

class SparkShimProvider extends org.apache.gluten.sql.shims.SparkShimProvider {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ object SparkSortShuffleWriterUtil {
context: TaskContext,
writeMetrics: ShuffleWriteMetricsReporter,
shuffleExecutorComponents: ShuffleExecutorComponents): ShuffleWriter[K, V] = {
new SortShuffleWriter(handle, mapId, context, shuffleExecutorComponents)
new SortShuffleWriter(handle, mapId, context, writeMetrics, shuffleExecutorComponents)
}
}
4 changes: 2 additions & 2 deletions tools/gluten-it/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<scala.library.version>2.12.17</scala.library.version>
<spark.version>3.4.3</spark.version>
<spark.version>3.4.4</spark.version>
<scala.binary.version>2.12</scala.binary.version>
<spark.major.version>3</spark.major.version>
<celeborn.version>0.3.2-incubating</celeborn.version>
Expand Down Expand Up @@ -163,7 +163,7 @@
<profile>
<id>spark-3.4</id>
<properties>
<spark.version>3.4.3</spark.version>
<spark.version>3.4.4</spark.version>
<scala.library.version>2.12.17</scala.library.version>
</properties>
</profile>
Expand Down