Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0118dda
build: add spark-4.1 profile and enable Spark 4.1.1 SQL tests
andygrove Apr 26, 2026
7a0dd7e
save current progress
andygrove Apr 26, 2026
203b88a
fix: support Spark 4.1 IndexShuffleBlockResolver constructor and docu…
andygrove Apr 26, 2026
b17726e
docs: reflow spark-sql-tests.md with prettier
andygrove Apr 26, 2026
626c966
build: add spark-4.1 test shim sources
andygrove Apr 26, 2026
8521e41
test: add spark-4.1 plan-stability golden files
andygrove Apr 26, 2026
7205d4d
Merge remote-tracking branch 'apache/main' into spark-4.1.1
andygrove Apr 26, 2026
330e400
fix: add isVariantStruct shim for spark-4.1 profile
andygrove Apr 26, 2026
50008a6
ci: purge partial pom-only entries from local Maven cache before sbt
andygrove Apr 26, 2026
5daf943
ci: enable spark-4.1 PR builds in Linux and macOS matrices
andygrove Apr 26, 2026
e93e67d
fix: pin spark-4.1 profile Scala to 2.13.16 for semanticdb compatibility
andygrove Apr 26, 2026
58bd76a
fix: keep spark-4.1 on Scala 2.13.17 and skip semanticdb lint
andygrove Apr 26, 2026
05cd6c4
fix: drop -Pscala-2.13 from macOS spark-4.1 matrix entry
andygrove Apr 26, 2026
5a60be2
fix: Spark 4.1 newTaskTempFile + REMAINDER_BY_ZERO error class
andygrove Apr 26, 2026
9a154b7
Merge remote-tracking branch 'apache/main' into spark-4.1.1
andygrove Apr 28, 2026
84379ec
test: drop spark-4.1 plan-stability golden files
andygrove Apr 28, 2026
dce8dfa
test: revert CometExpressionSuite spark-4.1 changes
andygrove Apr 28, 2026
cf81dea
ci: revert spark-4.1 entries from pr_build workflows
andygrove Apr 28, 2026
1190b5a
fix(spark-4.1): unblock Spark 4.1.1 SQL tests in CI
andygrove Apr 28, 2026
fc8e8e3
fix(spark-4.1): skip failing tests in 4.1.1 SQL test diff
andygrove Apr 28, 2026
98a178c
ci: raise SBT and forked JVM heap for Spark SQL tests
andygrove Apr 29, 2026
ebbc249
ci: drop _JAVA_OPTIONS that broke SBT startup
andygrove Apr 29, 2026
f5edafa
ci: run Spark SQL tests on runs-on.com 16-cpu runners
andygrove Apr 29, 2026
2c79a49
ci: pin Hive tests to ubuntu-24.04 and skip flaky/incompatible 4.1 SQ…
andygrove Apr 29, 2026
59494d1
test: skip Spark 4.1 plan-shape tests that introspect Spark-only types
andygrove Apr 29, 2026
f50b4a7
test: link IgnoreComet tags to tracking issue #4098
andygrove Apr 29, 2026
fff9158
ci: TEMP disable non-essential workflows on spark-4.1.1 branch
andygrove Apr 29, 2026
cb639a3
test: drop unused ShuffleExchangeExec import in StreamingQuerySuite
andygrove Apr 29, 2026
e91c669
Revert "ci: TEMP disable non-essential workflows on spark-4.1.1 branch"
andygrove Apr 29, 2026
c5a0b81
test: skip RocksDBStateStoreIntegrationSuite under Comet on Spark 4.1
andygrove Apr 29, 2026
9f4ec9c
Merge remote-tracking branch 'apache/main' into spark-4.1.1
andygrove Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/actions/setup-spark-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,23 @@ runs:
run: |
# Native library should already be in native/target/release/
./mvnw install -Prelease -DskipTests -Pspark-${{inputs.spark-short-version}}

- name: Purge partial Maven cache entries
shell: bash
run: |
# Comet's Maven phase resolves the dependency graph and downloads POMs
# for transitive artifacts whose JARs it never actually needs. When sbt
# then resolves Spark's deps, Coursier sees the POM in mavenLocal,
# declares the artifact "found locally", and fails on the missing JAR
# without falling back to Maven Central. Delete those partial entries
# so sbt re-fetches the full artifact remotely.
for repo in "$HOME/.m2/repository" /root/.m2/repository; do
[ -d "$repo" ] || continue
find "$repo" -name '*.pom' | while read -r pom; do
jar="${pom%.pom}.jar"
[ -f "$jar" ] && continue
grep -q '<packaging>jar</packaging>\|<packaging>bundle</packaging>' "$pom" 2>/dev/null || continue
rm -f "$pom" "${pom}.sha1" "${pom%.pom}.pom.lastUpdated" \
"$(dirname "$pom")/_remote.repositories"
done
done
10 changes: 7 additions & 3 deletions .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ jobs:
needs: build-native
strategy:
matrix:
os: [ubuntu-24.04]
module:
- {name: "catalyst", args1: "catalyst/test", args2: ""}
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
Expand All @@ -142,12 +141,17 @@ jobs:
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
- {spark-short: '4.0', spark-full: '4.0.2', java: 17, scan-impl: 'auto'}
- {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'}
- {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'}
fail-fast: false
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
runs-on: ${{ matrix.os }}
# Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite
# relies on an Ivy 'local-m2-cache' resolver that the runs-on.com
# ubuntu24-full-x64 image does not provide, so spark-submit fails there.
runs-on: ${{ startsWith(matrix.module.name, 'sql_hive') && 'ubuntu-24.04' || (github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest') }}
container:
image: amd64/rust
steps:
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
- uses: actions/checkout@v6
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
Expand All @@ -170,7 +174,7 @@ jobs:
cd apache-spark
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
build/sbt -Dsbt.log.noformat=true -mem 6144 ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
fi
Expand Down
Loading
Loading