diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml index 4ef0a30b8225..e8ac497ab04a 100644 --- a/.github/workflows/java-ci.yml +++ b/.github/workflows/java-ci.yml @@ -126,3 +126,16 @@ jobs: java-version: ${{ matrix.jvm }} - uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5 # zizmor: ignore[cache-poisoning] -- cache writes are restricted to the default branch by setup-gradle - run: ./gradlew -Pquick=true javadoc + + check-runtime-deps: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 + with: + distribution: zulu + java-version: 17 + - uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5 # zizmor: ignore[cache-poisoning] -- cache writes are restricted to the default branch by setup-gradle + - run: ./gradlew checkAllRuntimeDeps -q diff --git a/aws-bundle/build.gradle b/aws-bundle/build.gradle index 5b9054812a50..c891ac5b439c 100644 --- a/aws-bundle/build.gradle +++ b/aws-bundle/build.gradle @@ -66,4 +66,6 @@ project(":iceberg-aws-bundle") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/azure-bundle/build.gradle b/azure-bundle/build.gradle index 0bdc30fdaa7e..dad563b67ab7 100644 --- a/azure-bundle/build.gradle +++ b/azure-bundle/build.gradle @@ -52,4 +52,6 @@ project(":iceberg-azure-bundle") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/build.gradle b/build.gradle index 35e1d6a002f3..8ff278b77ee4 100644 --- a/build.gradle +++ b/build.gradle @@ -120,6 +120,15 @@ allprojects { } } +tasks.register('checkAllRuntimeDeps') { + description = 'Validates runtime dependency baselines for all subprojects that have them' + group = 'verification' + + dependsOn subprojects.collect { subproject -> + subproject.tasks.matching { it.name == 'checkRuntimeDeps' } + } +} + subprojects { if (it.name == 'iceberg-bom') { // the BOM does not build anything, the code below expects "source code" diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 52a800723598..f94ef5bf8988 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -29,3 +29,4 @@ sitemap.xml .python-version **/*_index.md **/.venv/** +**/runtime-deps.txt diff --git a/flink/v1.20/build.gradle b/flink/v1.20/build.gradle index 3591bf37b1a7..772133c8e1d8 100644 --- a/flink/v1.20/build.gradle +++ b/flink/v1.20/build.gradle @@ -266,4 +266,6 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/flink/v2.0/build.gradle b/flink/v2.0/build.gradle index 5907f41b3544..b276cb90dd24 100644 --- a/flink/v2.0/build.gradle +++ b/flink/v2.0/build.gradle @@ -266,4 +266,6 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/flink/v2.1/build.gradle b/flink/v2.1/build.gradle index 91081bdc2e42..a08cb1d5ebdd 100644 --- a/flink/v2.1/build.gradle +++ b/flink/v2.1/build.gradle @@ -266,4 +266,6 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/gcp-bundle/build.gradle b/gcp-bundle/build.gradle index 6ebe05ccdbce..1f6642c9b2ce 100644 --- a/gcp-bundle/build.gradle +++ b/gcp-bundle/build.gradle @@ -59,4 +59,6 @@ project(":iceberg-gcp-bundle") { jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/kafka-connect/build.gradle b/kafka-connect/build.gradle index e93ad8641f79..009ae719bac5 100644 --- a/kafka-connect/build.gradle +++ b/kafka-connect/build.gradle @@ -254,6 +254,8 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { check.dependsOn integrationTest assemble.dependsOn distZip, hiveDistZip + + apply from: "${rootDir}/runtime-deps.gradle" } project(':iceberg-kafka-connect:iceberg-kafka-connect-transforms') { diff --git a/runtime-deps.gradle b/runtime-deps.gradle new file mode 100644 index 000000000000..e08fce3fe47a --- /dev/null +++ b/runtime-deps.gradle @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Guards the runtime dependency surface for shadow JAR modules. +// +// Prevents accidental transitive dependency growth in shipped shadow JARs. +// Without this guard, adding a single catalog module as 'implementation' +// instead of 'compileOnly' can silently leak dozens of transitive artifacts +// into the runtime JAR, inflating its size and introducing unlicensed code. +// +// Apply this script in any project that ships a bundled artifact: Spark and +// Flink runtime shadow JARs, cloud bundles (aws, azure, gcp), and Kafka +// Connect runtime distribution. +// +// It adds two tasks: +// +// generateRuntimeDeps - resolves runtimeClasspath and writes a sorted +// baseline of group:artifact:version coordinates +// to runtime-deps.txt in the project directory. +// +// checkRuntimeDeps - compares the resolved dependencies against the +// checked-in baseline and fails with a diff if +// they don't match. Patch-level version changes are +// ignored so that routine Dependabot bumps don't +// require a baseline update. Wired into the 'check' +// lifecycle. +// +// Workflow: +// 1. ./gradlew check -- fails if deps changed +// 2. ./gradlew generateRuntimeDeps -- auto-updates all baselines +// 3. Update LICENSE and NOTICE if dependency licenses changed -- This is a Manual Step +// 4. Commit + +def depsFile = file("${projectDir}/runtime-deps.txt") + +def resolveRuntimeDeps = { + configurations.runtimeClasspath.resolvedConfiguration + .resolvedArtifacts + .collect { "${it.moduleVersion.id.group}:${it.moduleVersion.id.name}:${it.moduleVersion.id.version}" } + .findAll { !it.startsWith('org.apache.iceberg:') } + .toSorted() + .toUnique() +} + +tasks.register('generateRuntimeDeps') { + group = 'verification' + description = 'Regenerate the runtime dependency baseline after intentional dependency changes' + outputs.file(depsFile) + doLast { + def deps = resolveRuntimeDeps() + depsFile.text = deps.join('\n') + '\n' + logger.lifecycle("Wrote ${deps.size()} dependencies to ${depsFile}") + logger.lifecycle("Review the diff, then update LICENSE and NOTICE if licenses changed.") + } +} + +tasks.register('checkRuntimeDeps') { + group = 'verification' + description = 'Verify runtime dependencies match the checked-in baseline' + inputs.files(configurations.runtimeClasspath) + outputs.file(depsFile) + doLast { + if (!depsFile.exists()) { + logger.warn("WARNING: Missing ${depsFile.name} in ${projectDir}. " + + "Run: ./gradlew ${project.path}:generateRuntimeDeps") + return + } + + def actual = resolveRuntimeDeps() + def expected = depsFile.readLines().findAll { it.trim() }.toSorted() + + def groupArtifact = { coord -> coord.substring(0, coord.lastIndexOf(':')) } + def majorMinor = { coord -> + def ver = coord.substring(coord.lastIndexOf(':') + 1) + def parts = ver.split('\\.') + parts.length >= 2 ? "${parts[0]}.${parts[1]}" : ver + } + + def actualByModule = actual.collectEntries { [(groupArtifact(it)): it] } + def expectedByModule = expected.collectEntries { [(groupArtifact(it)): it] } + + def added = actualByModule.keySet() - expectedByModule.keySet() + def removed = expectedByModule.keySet() - actualByModule.keySet() + def shared = actualByModule.keySet().intersect(expectedByModule.keySet()) + def versionChanged = shared.findAll { + majorMinor(actualByModule[it]) != majorMinor(expectedByModule[it]) + } + + if (added || removed || versionChanged) { + def msg = new StringBuilder() + msg.append("Runtime dependency baseline mismatch for ${project.name}!\n") + if (versionChanged) { + msg.append("\n Version changed (${versionChanged.size()}):\n") + versionChanged.toSorted().each { module -> + msg.append(" ~ ${expectedByModule[module]} -> ${actualByModule[module]}\n") + } + } + if (added) { + msg.append("\n Added (${added.size()}):\n") + added.toSorted().each { module -> msg.append(" + ${actualByModule[module]}\n") } + } + if (removed) { + msg.append("\n Removed (${removed.size()}):\n") + removed.toSorted().each { module -> msg.append(" - ${expectedByModule[module]}\n") } + } + msg.append("\nTo update the baseline run:\n") + msg.append(" ./gradlew ${project.path}:generateRuntimeDeps\n") + msg.append("\nThen update LICENSE and NOTICE to reflect the dependency changes.") + throw new GradleException(msg.toString()) + } + } +} + +check.dependsOn checkRuntimeDeps diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle index bfe84b08dfa4..bb8270e3d303 100644 --- a/spark/v3.4/build.gradle +++ b/spark/v3.4/build.gradle @@ -333,5 +333,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/spark/v3.5/build.gradle b/spark/v3.5/build.gradle index 2fe3deb0ce5e..18fca51be251 100644 --- a/spark/v3.5/build.gradle +++ b/spark/v3.5/build.gradle @@ -335,5 +335,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/spark/v4.0/build.gradle b/spark/v4.0/build.gradle index acc4d7529f37..62111e104e26 100644 --- a/spark/v4.0/build.gradle +++ b/spark/v4.0/build.gradle @@ -335,5 +335,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" } diff --git a/spark/v4.1/build.gradle b/spark/v4.1/build.gradle index 6a46cb4b2063..355a85ab81a9 100644 --- a/spark/v4.1/build.gradle +++ b/spark/v4.1/build.gradle @@ -335,5 +335,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio jar { enabled = false } + + apply from: "${rootDir}/runtime-deps.gradle" }