From 4b124cb9f095fb27a068a98891cbf4b8bf1b0f0d Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Mon, 13 Jan 2025 16:05:33 -0500 Subject: [PATCH 01/14] Update slf4j version --- .../main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 7b791ef9aa8e..c5a550f2cae9 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -635,7 +635,7 @@ class BeamModulePlugin implements Plugin { def quickcheck_version = "1.0" def sbe_tool_version = "1.25.1" def singlestore_jdbc_version = "1.1.4" - def slf4j_version = "1.7.30" + def slf4j_version = "2.0.16" def snakeyaml_engine_version = "2.6" def snakeyaml_version = "2.2" def solace_version = "10.21.0" From f3636ff241130110ba47a46e1f20659a177fa9b4 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Mon, 13 Jan 2025 16:27:28 -0500 Subject: [PATCH 02/14] Remove slf4j from arrow dependency exclusion. --- sdks/java/extensions/arrow/build.gradle | 15 +++------------ sdks/java/io/google-cloud-platform/build.gradle | 15 +++------------ 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/sdks/java/extensions/arrow/build.gradle b/sdks/java/extensions/arrow/build.gradle index 33c62a6ace28..4fc663ddbc28 100644 --- a/sdks/java/extensions/arrow/build.gradle +++ b/sdks/java/extensions/arrow/build.gradle @@ -24,19 +24,10 @@ description = "Apache Beam :: SDKs :: Java :: Extensions :: Arrow" dependencies { implementation library.java.vendored_guava_32_1_2_jre implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation(library.java.arrow_vector) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } - implementation(library.java.arrow_memory_core) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } + implementation(library.java.arrow_vector) + implementation(library.java.arrow_memory_core) implementation library.java.joda_time - testImplementation(library.java.arrow_memory_netty) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } + testImplementation(library.java.arrow_memory_netty) testImplementation library.java.junit testImplementation library.java.hamcrest testRuntimeOnly library.java.slf4j_simple diff --git a/sdks/java/io/google-cloud-platform/build.gradle b/sdks/java/io/google-cloud-platform/build.gradle index 0a5a89072963..4f07dfd41668 100644 --- a/sdks/java/io/google-cloud-platform/build.gradle +++ b/sdks/java/io/google-cloud-platform/build.gradle @@ -138,22 +138,13 @@ dependencies { implementation library.java.slf4j_api implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre - implementation(library.java.arrow_memory_core) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } - implementation(library.java.arrow_vector) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } + implementation library.java.arrow_memory_core + implementation library.java.arrow_vector implementation 'com.google.http-client:google-http-client-gson:1.41.2' implementation "org.threeten:threetenbp:1.4.4" - testImplementation(library.java.arrow_memory_netty) { - // Arrow 15 has compile dependency of slf4j 2.x where Beam does not support - exclude group: 'org.slf4j', module: 'slf4j-api' - } + testImplementation library.java.arrow_memory_netty testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:extensions:google-cloud-platform-core", configuration: "testRuntimeMigration") From 440bd2aa44e84ae5626b93e298d75dedb357bce1 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 14 Jan 2025 13:52:08 -0500 Subject: [PATCH 03/14] Fix two test failures due to upgrade slf4j-jdk14 to 2.x The class path of slf4j has been changed from org/slf4j/impl to org/slf4j/jul. Two failed tests: - :runners:google-cloud-dataflow-java:worker:validateShadedJarContainsSlf4jJdk14 - :runners:google-cloud-dataflow-java:worker:validateShadedJarDoesntLeakNonProjectClasses --- runners/google-cloud-dataflow-java/worker/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index b7e6e981effe..cbc731aff2e9 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -87,7 +87,7 @@ applyJavaNature( // TODO(https://github.com/apache/beam/issues/19114): Move DataflowRunnerHarness class under org.apache.beam.runners.dataflow.worker namespace "com/google/cloud/dataflow/worker/DataflowRunnerHarness.class", // Allow slf4j implementation worker for logging during pipeline execution - "org/slf4j/impl/**" + "org/slf4j/jul/**" ], generatedClassPatterns: [ /^org\.apache\.beam\.runners\.dataflow\.worker\.windmill.*/, @@ -240,7 +240,7 @@ project.task('validateShadedJarContainsSlf4jJdk14', dependsOn: 'shadowJar') { doLast { project.configurations.shadow.artifacts.files.each { FileTree slf4jImpl = project.zipTree(it).matching { - include "org/slf4j/impl/JDK14LoggerAdapter.class" + include "org/slf4j/jul/JDK14LoggerAdapter.class" } outFile.text = slf4jImpl.files if (slf4jImpl.files.isEmpty()) { From 0f4e5f1e8ec7881934767037f0bd25663b0ac29e Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 14 Jan 2025 15:02:42 -0500 Subject: [PATCH 04/14] Fixed another four failed tests. The failed tests are under org.apache.beam.runners.dataflow.worker.HotKeyLoggerTest --- runners/google-cloud-dataflow-java/worker/build.gradle | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index cbc731aff2e9..0d850dd8499c 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -250,6 +250,14 @@ project.task('validateShadedJarContainsSlf4jJdk14', dependsOn: 'shadowJar') { } } +project.tasks.withType(Test).configureEach { + jvmArgs( + "--add-opens=java.base/java.lang=ALL-UNNAMED", + "--add-opens=java.base/java.util=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + ) +} + tasks.check.dependsOn project.tasks.validateShadedJarContainsSlf4jJdk14 //TODO(https://github.com/apache/beam/issues/19115): checktyle task should be enabled in the future. From fd16148bb76623246f1662af2ecb6e78c1505d42 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 4 Feb 2025 14:16:37 -0500 Subject: [PATCH 05/14] Bump the default spark version from 3.2.2 to 3.5.0. The previous version has a compile dependency on slf4j 1.x binding, which would no longer work with slf4j 2.x. --- .../main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index b2cb3dd2e859..a0022cba83b1 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -641,7 +641,7 @@ class BeamModulePlugin implements Plugin { def snakeyaml_version = "2.2" def solace_version = "10.21.0" def spark2_version = "2.4.8" - def spark3_version = "3.2.2" + def spark3_version = "3.5.0" def spotbugs_version = "4.0.6" def testcontainers_version = "1.19.7" // [bomupgrader] determined by: org.apache.arrow:arrow-memory-core, consistent with: google_cloud_platform_libraries_bom From bcc3465083daa4e6c7c62c6091326049e95257a6 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 4 Feb 2025 16:15:15 -0500 Subject: [PATCH 06/14] Add used but not declared deps for spark 3.5.0 --- runners/spark/spark_runner.gradle | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle index 80b35894bab8..bbb3976ef4cb 100644 --- a/runners/spark/spark_runner.gradle +++ b/runners/spark/spark_runner.gradle @@ -202,6 +202,10 @@ dependencies { testImplementation library.java.mockito_core testImplementation "org.assertj:assertj-core:3.11.1" testImplementation "org.apache.zookeeper:zookeeper:3.4.11" + if ("$spark_version" >= "3.5.0") { + testImplementation "org.apache.spark:spark-common-utils_$spark_scala_version:$spark_version" + testImplementation "org.apache.spark:spark-sql-api_$spark_scala_version:$spark_version" + } validatesRunner project(path: ":sdks:java:core", configuration: "shadowTest") validatesRunner project(path: ":runners:core-java", configuration: "testRuntimeMigration") validatesRunner project(":sdks:java:io:hadoop-format") From 43ddc022e2afc9c270173a83031cab2020b8c8b7 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 4 Feb 2025 16:15:50 -0500 Subject: [PATCH 07/14] Temporary modify spark version to 3.x in sparkreceiver. --- sdks/java/io/sparkreceiver/2/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/java/io/sparkreceiver/2/build.gradle b/sdks/java/io/sparkreceiver/2/build.gradle index bf611e36bff1..c57cbac2fd3d 100644 --- a/sdks/java/io/sparkreceiver/2/build.gradle +++ b/sdks/java/io/sparkreceiver/2/build.gradle @@ -43,8 +43,8 @@ dependencies { implementation library.java.commons_lang3 implementation library.java.joda_time implementation library.java.slf4j_api - implementation library.java.spark_streaming - implementation library.java.spark_core + implementation library.java.spark3_streaming + implementation library.java.spark3_core implementation library.java.vendored_guava_32_1_2_jre implementation project(path: ":sdks:java:core", configuration: "shadow") compileOnly "org.scala-lang:scala-library:2.11.12" From f452f0539bb5686c2a2e8ba0c1b5eb53ff5bedac Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 4 Feb 2025 21:44:42 -0500 Subject: [PATCH 08/14] Fix failed spark tests. --- runners/spark/3/build.gradle | 7 ++++--- runners/spark/spark_runner.gradle | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/runners/spark/3/build.gradle b/runners/spark/3/build.gradle index 5103805db347..f36d54072533 100644 --- a/runners/spark/3/build.gradle +++ b/runners/spark/3/build.gradle @@ -40,9 +40,10 @@ def sparkVersions = [ "332": "3.3.2", "331": "3.3.1", "330": "3.3.0", - "324": "3.2.4", - "323": "3.2.3", - "321": "3.2.1", + // The following 3.2.x versions do not work with slf4j 2.x + //"324": "3.2.4", + //"323": "3.2.3", + //"321": "3.2.1", "312": "3.1.2", "311": "3.1.1" ] diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle index bbb3976ef4cb..1f60244135a9 100644 --- a/runners/spark/spark_runner.gradle +++ b/runners/spark/spark_runner.gradle @@ -176,6 +176,10 @@ dependencies { spark.components.each { component -> provided "$component:$spark_version" } + if ("$spark_version" >= "3.5.0") { + implementation "org.apache.spark:spark-common-utils_$spark_scala_version:$spark_version" + implementation "org.apache.spark:spark-sql-api_$spark_scala_version:$spark_version" + } permitUnusedDeclared "org.apache.spark:spark-network-common_$spark_scala_version:$spark_version" implementation "io.dropwizard.metrics:metrics-core:4.1.1" // version used by Spark 3.1 compileOnly "org.scala-lang:scala-library:2.12.15" From 40cdc9d6f48ef455818e6c2c7a1a0f2e4c79fa10 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 4 Feb 2025 23:10:26 -0500 Subject: [PATCH 09/14] A better workaround for Spark 3.2.x --- runners/spark/3/build.gradle | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/runners/spark/3/build.gradle b/runners/spark/3/build.gradle index f36d54072533..812bccb58be7 100644 --- a/runners/spark/3/build.gradle +++ b/runners/spark/3/build.gradle @@ -40,10 +40,9 @@ def sparkVersions = [ "332": "3.3.2", "331": "3.3.1", "330": "3.3.0", - // The following 3.2.x versions do not work with slf4j 2.x - //"324": "3.2.4", - //"323": "3.2.3", - //"321": "3.2.1", + "324": "3.2.4", + "323": "3.2.3", + "321": "3.2.1", "312": "3.1.2", "311": "3.1.1" ] @@ -57,6 +56,25 @@ sparkVersions.each { kv -> } dependencies { + // Spark versions prior to 3.4.0 are compiled against SLF4J 1.x. The + // `org.apache.spark.internal.Logging.isLog4j12()` function references an + // SLF4J 1.x binding class (org.slf4j.impl.StaticLoggerBinder) which is + // no longer available in SLF4J 2.x. This results in a + // `java.lang.NoClassDefFoundError`. + // + // The workaround here provides an SLF4J 1.x binding to resolve the error. + // Only `org.apache.logging.log4j:log4j-slf4j-impl` provides a compatible + // SLF4J 1.x binding regardless SLF4J upgrade. + // Other package from SLF4J (e.g., slf4j-simple, slf4j-reload4j) are + // unsuitable because they also get upgraded as new SLF4J version is in use, + // and the binding classes are no longer in there. + // + // Notice that Spark 3.1.x uses `ch.qos.logback:logback-classic` and is + // unaffected by this SLF4J upgrade. Spark 3.3.x already uses + // `log4j-slf4j-impl` so it is also unaffected. + if ("$kv.key" >= "320" && "$kv.key" <= "324") { + "sparkVersion$kv.key" library.java.log4j2_slf4j_impl + } spark.components.each { component -> "sparkVersion$kv.key" "$component:$kv.value" } } From b810034165db41855402f3b3923eb1ef9977f43a Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 5 Feb 2025 12:22:14 -0500 Subject: [PATCH 10/14] Take out the add-opens for tests as they were only run in java 8 and 11. --- runners/google-cloud-dataflow-java/worker/build.gradle | 8 -------- 1 file changed, 8 deletions(-) diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index a6abe85dcbb3..a1deae5784f6 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -250,14 +250,6 @@ project.task('validateShadedJarContainsSlf4jJdk14', dependsOn: 'shadowJar') { } } -project.tasks.withType(Test).configureEach { - jvmArgs( - "--add-opens=java.base/java.lang=ALL-UNNAMED", - "--add-opens=java.base/java.util=ALL-UNNAMED", - "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", - ) -} - tasks.check.dependsOn project.tasks.validateShadedJarContainsSlf4jJdk14 //TODO(https://github.com/apache/beam/issues/19115): checktyle task should be enabled in the future. From bdbb72e325d54ec67d153b86dadefef6530146e8 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 5 Feb 2025 13:15:53 -0500 Subject: [PATCH 11/14] Mention changes in CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index fc4a32120afa..107a6613a82e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -65,7 +65,7 @@ * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## New Features / Improvements - +* [Java] Upgrade SLF4J to 2.0.16. Update default Spark version to 3.5.0. ([#33574](https://github.com/apache/beam/pull/33574)) * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## Breaking Changes From 1710eff2bff4e6acefaf7bef5f3e48a8245745cc Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 5 Feb 2025 14:03:45 -0500 Subject: [PATCH 12/14] Update comments --- runners/spark/3/build.gradle | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/runners/spark/3/build.gradle b/runners/spark/3/build.gradle index 812bccb58be7..33327adde4a5 100644 --- a/runners/spark/3/build.gradle +++ b/runners/spark/3/build.gradle @@ -62,15 +62,16 @@ sparkVersions.each { kv -> // no longer available in SLF4J 2.x. This results in a // `java.lang.NoClassDefFoundError`. // - // The workaround here provides an SLF4J 1.x binding to resolve the error. - // Only `org.apache.logging.log4j:log4j-slf4j-impl` provides a compatible - // SLF4J 1.x binding regardless SLF4J upgrade. - // Other package from SLF4J (e.g., slf4j-simple, slf4j-reload4j) are - // unsuitable because they also get upgraded as new SLF4J version is in use, - // and the binding classes are no longer in there. + // The workaround is to provide an SLF4J 1.x binding module out of group + // `org.slf4j` to resolve the issue. + // Module `org.apache.logging.log4j:log4j-slf4j-impl` is an example that + // provides a compatible SLF4J 1.x binding regardless SLF4J upgrade. + // Binding/provider modules under group `org.slf4j` (e.g., + // slf4j-simple, slf4j-reload4j) get upgraded as a new SLF4J version is in + // use, and therefore do not contain the 1.x binding classes. // // Notice that Spark 3.1.x uses `ch.qos.logback:logback-classic` and is - // unaffected by this SLF4J upgrade. Spark 3.3.x already uses + // unaffected by the SLF4J upgrade. Spark 3.3.x already uses // `log4j-slf4j-impl` so it is also unaffected. if ("$kv.key" >= "320" && "$kv.key" <= "324") { "sparkVersion$kv.key" library.java.log4j2_slf4j_impl From d6b56c6cf4831d821c35c0ac13f75c5a3fbb6505 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 11 Feb 2025 16:28:59 -0500 Subject: [PATCH 13/14] Move sparkReceiver/2 to sparkreceiver/3 that supports Spark 3.x. --- .../workflows/beam_PerformanceTests_SparkReceiver_IO.yml | 2 +- CHANGES.md | 2 +- build.gradle.kts | 2 +- sdks/java/io/cdap/build.gradle | 2 +- sdks/java/io/sparkreceiver/{2 => 3}/README.md | 6 +++--- sdks/java/io/sparkreceiver/{2 => 3}/build.gradle | 0 .../org/apache/beam/sdk/io/sparkreceiver/HasOffset.java | 0 .../sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java | 0 .../apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java | 0 .../org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java | 0 .../apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java | 0 .../apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java | 0 .../org/apache/beam/sdk/io/sparkreceiver/package-info.java | 0 .../beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java | 0 .../beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java | 0 .../beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java | 0 .../beam/sdk/io/sparkreceiver/IteratorDataReceiver.java | 0 .../sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java | 0 .../ReadFromSparkReceiverWithOffsetDoFnTest.java | 0 .../beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java | 0 .../apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java | 0 .../beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java | 0 settings.gradle.kts | 2 +- 23 files changed, 8 insertions(+), 8 deletions(-) rename sdks/java/io/sparkreceiver/{2 => 3}/README.md (95%) rename sdks/java/io/sparkreceiver/{2 => 3}/build.gradle (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/main/java/org/apache/beam/sdk/io/sparkreceiver/package-info.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/IteratorDataReceiver.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java (100%) rename sdks/java/io/sparkreceiver/{2 => 3}/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java (100%) diff --git a/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml b/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml index cbd460d4782e..ace393eb161f 100644 --- a/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml +++ b/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml @@ -96,7 +96,7 @@ jobs: - name: run integrationTest uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:java:io:sparkreceiver:2:integrationTest + gradle-command: :sdks:java:io:sparkreceiver:3:integrationTest arguments: | --info \ --tests org.apache.beam.sdk.io.sparkreceiver.SparkReceiverIOIT \ diff --git a/CHANGES.md b/CHANGES.md index 107a6613a82e..c93fcb7d6d0b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -69,7 +69,7 @@ * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## Breaking Changes - +* [Java] SparkReceiver 2 has been moved to SparkReceiver 3 that supports Spark 3.x. ([#33574](https://github.com/apache/beam/pull/33574)) * X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). ## Deprecations diff --git a/build.gradle.kts b/build.gradle.kts index 0adb29058479..ec3b4968ce03 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -304,7 +304,7 @@ tasks.register("javaPreCommit") { dependsOn(":sdks:java:io:contextualtextio:build") dependsOn(":sdks:java:io:expansion-service:build") dependsOn(":sdks:java:io:file-based-io-tests:build") - dependsOn(":sdks:java:io:sparkreceiver:2:build") + dependsOn(":sdks:java:io:sparkreceiver:3:build") dependsOn(":sdks:java:io:synthetic:build") dependsOn(":sdks:java:io:xml:build") dependsOn(":sdks:java:javadoc:allJavadoc") diff --git a/sdks/java/io/cdap/build.gradle b/sdks/java/io/cdap/build.gradle index 4d823b1ad78b..95e6a522f1e3 100644 --- a/sdks/java/io/cdap/build.gradle +++ b/sdks/java/io/cdap/build.gradle @@ -60,7 +60,7 @@ dependencies { implementation library.java.tephra implementation library.java.vendored_guava_32_1_2_jre implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation project(":sdks:java:io:sparkreceiver:2") + implementation project(":sdks:java:io:sparkreceiver:3") implementation project(":sdks:java:io:hadoop-format") testImplementation library.java.cdap_plugin_service_now testImplementation library.java.cdap_etl_api diff --git a/sdks/java/io/sparkreceiver/2/README.md b/sdks/java/io/sparkreceiver/3/README.md similarity index 95% rename from sdks/java/io/sparkreceiver/2/README.md rename to sdks/java/io/sparkreceiver/3/README.md index 137bcd18b65e..0f65e12ae8da 100644 --- a/sdks/java/io/sparkreceiver/2/README.md +++ b/sdks/java/io/sparkreceiver/3/README.md @@ -18,11 +18,11 @@ --> # SparkReceiverIO -SparkReceiverIO provides I/O transforms to read messages from an [Apache Spark Receiver](https://spark.apache.org/docs/2.4.0/streaming-custom-receivers.html) `org.apache.spark.streaming.receiver.Receiver` as an unbounded source. +SparkReceiverIO provides I/O transforms to read messages from an [Apache Spark Receiver](https://spark.apache.org/docs/3.5.0/streaming-custom-receivers.html) `org.apache.spark.streaming.receiver.Receiver` as an unbounded source. ## Prerequistes -SparkReceiverIO supports [Spark Receivers](https://spark.apache.org/docs/2.4.0/streaming-custom-receivers.html) (Spark version 2.4). +SparkReceiverIO supports [Spark Receivers](https://spark.apache.org/docs/3.5.0/streaming-custom-receivers.html) (Spark version 3.x, tested on Spark version 3.5.0). 1. Corresponding Spark Receiver should implement [HasOffset](https://github.com/apache/beam/blob/master/sdks/java/io/sparkreceiver/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java) interface. 2. Records should have the numeric field that represents record offset. *Example:* `RecordId` field for Salesforce and `vid` field for Hubspot Receivers. For more details please see [GetOffsetUtils](https://github.com/apache/beam/tree/master/examples/java/cdap/src/main/java/org/apache/beam/examples/complete/cdap/utils/GetOffsetUtils.java) class from CDAP plugins examples. @@ -53,7 +53,7 @@ To learn more, please check out CDAP Streaming plugins [complete examples](https ## Dependencies -To use SparkReceiverIO, add a dependency on `beam-sdks-java-io-sparkreceiver`. +To use SparkReceiverIO, add a dependency on `beam-sdks-java-io-sparkreceiver-3`. ```maven diff --git a/sdks/java/io/sparkreceiver/2/build.gradle b/sdks/java/io/sparkreceiver/3/build.gradle similarity index 100% rename from sdks/java/io/sparkreceiver/2/build.gradle rename to sdks/java/io/sparkreceiver/3/build.gradle diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/HasOffset.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFn.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilder.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkConsumer.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIO.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/WrappedSupervisor.java diff --git a/sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/package-info.java b/sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/package-info.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/main/java/org/apache/beam/sdk/io/sparkreceiver/package-info.java rename to sdks/java/io/sparkreceiver/3/src/main/java/org/apache/beam/sdk/io/sparkreceiver/package-info.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ArrayBufferDataReceiver.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ByteBufferDataReceiver.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/CustomReceiverWithOffset.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/IteratorDataReceiver.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/IteratorDataReceiver.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/IteratorDataReceiver.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/IteratorDataReceiver.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/RabbitMqReceiverWithOffset.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReadFromSparkReceiverWithOffsetDoFnTest.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/ReceiverBuilderTest.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOIT.java diff --git a/sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java b/sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java similarity index 100% rename from sdks/java/io/sparkreceiver/2/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java rename to sdks/java/io/sparkreceiver/3/src/test/java/org/apache/beam/sdk/io/sparkreceiver/SparkReceiverIOTest.java diff --git a/settings.gradle.kts b/settings.gradle.kts index 32dd9ece8e26..470d6c020a52 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -252,7 +252,7 @@ include(":sdks:java:io:rabbitmq") include(":sdks:java:io:redis") include(":sdks:java:io:rrio") include(":sdks:java:io:solr") -include(":sdks:java:io:sparkreceiver:2") +include(":sdks:java:io:sparkreceiver:3") include(":sdks:java:io:snowflake") include(":sdks:java:io:snowflake:expansion-service") include(":sdks:java:io:splunk") From 935a63753bc95b41e95531b440a4da8439362c5c Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 12 Feb 2025 13:44:01 -0500 Subject: [PATCH 14/14] Minor fix on cdap spark dependency --- sdks/java/io/cdap/build.gradle | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sdks/java/io/cdap/build.gradle b/sdks/java/io/cdap/build.gradle index 95e6a522f1e3..6ccb10a5c891 100644 --- a/sdks/java/io/cdap/build.gradle +++ b/sdks/java/io/cdap/build.gradle @@ -45,7 +45,11 @@ dependencies { implementation library.java.cdap_etl_api implementation library.java.cdap_etl_api_spark implementation library.java.cdap_hydrator_common - implementation library.java.cdap_plugin_hubspot + implementation (library.java.cdap_plugin_hubspot) { + // Excluding the module for scala 2.11, because Spark 3.x uses scala + // 2.12 instead. + exclude group: "com.fasterxml.jackson.module", module: "jackson-module-scala_2.11" + } implementation library.java.cdap_plugin_salesforce implementation library.java.cdap_plugin_service_now implementation library.java.cdap_plugin_zendesk @@ -56,7 +60,13 @@ dependencies { implementation library.java.jackson_core implementation library.java.jackson_databind implementation library.java.slf4j_api - implementation library.java.spark_streaming + implementation (library.java.spark3_streaming) { + // Excluding `org.slf4j:jul-to-slf4j` which was introduced as a + // transitive dependency in Spark 3.5.0 (particularly from + // spark-common-utils_2.12) and would cause stack overflow together with + // `org.slf4j:slf4j-jdk14`. + exclude group: "org.slf4j", module: "jul-to-slf4j" + } implementation library.java.tephra implementation library.java.vendored_guava_32_1_2_jre implementation project(path: ":sdks:java:core", configuration: "shadow")