From 8073d63ab40ec8507dc3752e0bda1a77fe5d2b62 Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 11:02:22 -0400 Subject: [PATCH 1/6] remove snake yaml --- sdks/java/core/build.gradle | 3 +- .../beam/sdk/schemas/utils/YamlUtils.java | 33 ++++++++++--------- .../apache/beam/sdk/util/YamlUtilsTest.java | 6 ++-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle index e150c22de62d..5f32b64893df 100644 --- a/sdks/java/core/build.gradle +++ b/sdks/java/core/build.gradle @@ -91,6 +91,7 @@ dependencies { shadow library.java.jackson_core shadow library.java.jackson_annotations shadow library.java.jackson_databind + shadow library.java.jackson_dataformat_yaml shadow library.java.slf4j_api shadow library.java.snappy_java shadow library.java.joda_time @@ -98,7 +99,6 @@ dependencies { permitUnusedDeclared enforcedPlatform(library.java.google_cloud_platform_libraries_bom) provided library.java.json_org implementation library.java.everit_json_schema - implementation library.java.snake_yaml shadowTest library.java.everit_json_schema provided library.java.junit testImplementation "com.github.stefanbirkner:system-rules:1.19.0" @@ -107,7 +107,6 @@ dependencies { provided 'com.facebook.presto.hadoop:hadoop-apache2:3.2.0-1' provided library.java.zstd_jni permitUnusedDeclared 'com.facebook.presto.hadoop:hadoop-apache2:3.2.0-1' - shadowTest library.java.jackson_dataformat_yaml shadowTest library.java.guava_testlib shadowTest library.java.mockito_core shadowTest library.java.hamcrest diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java index e631e166e8be..58b2cbfbae69 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java @@ -19,6 +19,9 @@ import static org.apache.beam.sdk.values.Row.toRow; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.math.BigDecimal; import java.util.Collections; import java.util.List; @@ -34,9 +37,10 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding; import org.checkerframework.checker.nullness.qual.Nullable; -import org.yaml.snakeyaml.Yaml; public class YamlUtils { + private static final ObjectMapper MAPPER = new ObjectMapper(new YAMLFactory()); + private static final Map> YAML_VALUE_PARSERS = ImmutableMap .) Preconditions.checkNotNull(yamlMap), schema, convertNamesToCamelCase); + return toBeamRow(yamlStringToMap(yamlString), schema, convertNamesToCamelCase); } private static @Nullable Object toBeamValue( @@ -180,13 +175,21 @@ public static String yamlStringFromMap(@Nullable Map map) { if (map == null || map.isEmpty()) { return ""; } - return new Yaml().dumpAsMap(map); + try { + return MAPPER.writeValueAsString(map); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } } - public static Map yamlStringToMap(@Nullable String yaml) { - if (yaml == null || yaml.isEmpty()) { + public static Map yamlStringToMap(@Nullable String yamlString) { + if (yamlString == null || yamlString.isEmpty()) { return Collections.emptyMap(); } - return new Yaml().load(yaml); + try { + return MAPPER.readValue(yamlString, Map.class); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java index bf032aed7b5c..4866b5fb74c7 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java @@ -123,11 +123,11 @@ public void testExtraFieldsAreIgnored() { @Test public void testInvalidTopLevelArray() { - String invalidYaml = "- top_level_list" + "- another_list"; + String invalidYaml = "- top_level_list\n" + "- another_list"; Schema schema = Schema.builder().build(); - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("Expected a YAML mapping"); + thrown.expect(RuntimeException.class); + thrown.expectMessage("Array value"); YamlUtils.toBeamRow(invalidYaml, schema); } From 919b9d749a0ddc806f6c3ca0b2d53e92995c07af Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 11:23:34 -0400 Subject: [PATCH 2/6] remove snake yaml from other places --- .../org/apache/beam/gradle/BeamModulePlugin.groovy | 2 -- .../org/apache/beam/sdk/schemas/utils/YamlUtils.java | 10 ++++++++++ sdks/java/expansion-service/build.gradle | 1 - .../sdk/expansion/service/ExpansionServiceConfig.java | 5 ++--- .../service/JavaClassLookupTransformProvider.java | 5 ++--- .../IcebergReadSchemaTransformProviderTest.java | 4 ++-- .../IcebergWriteSchemaTransformProviderTest.java | 4 ++-- sdks/java/transform-service/build.gradle | 1 - .../sdk/transformservice/TransformServiceConfig.java | 5 ++--- 9 files changed, 20 insertions(+), 17 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 7c692fcb3f4e..e54946456d4e 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -635,7 +635,6 @@ class BeamModulePlugin implements Plugin { def sbe_tool_version = "1.25.1" def singlestore_jdbc_version = "1.1.4" def slf4j_version = "1.7.30" - def snakeyaml_version = "2.2" def spark2_version = "2.4.8" def spark3_version = "3.2.2" def spotbugs_version = "4.0.6" @@ -867,7 +866,6 @@ class BeamModulePlugin implements Plugin { sbe_tool : "uk.co.real-logic:sbe-tool:$sbe_tool_version", singlestore_jdbc : "com.singlestore:singlestore-jdbc-client:$singlestore_jdbc_version", slf4j_api : "org.slf4j:slf4j-api:$slf4j_version", - snake_yaml : "org.yaml:snakeyaml:$snakeyaml_version", slf4j_android : "org.slf4j:slf4j-android:$slf4j_version", slf4j_ext : "org.slf4j:slf4j-ext:$slf4j_version", slf4j_jdk14 : "org.slf4j:slf4j-jdk14:$slf4j_version", diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java index 58b2cbfbae69..e004225ef226 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java @@ -22,6 +22,8 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.util.Collections; import java.util.List; @@ -192,4 +194,12 @@ public static Map yamlStringToMap(@Nullable String yamlString) { throw new RuntimeException(e); } } + + public static Map inputStreamToMap(InputStream inputStream) { + try { + return MAPPER.readValue(inputStream, Map.class); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/sdks/java/expansion-service/build.gradle b/sdks/java/expansion-service/build.gradle index 2926bfad633f..fe00c8e50350 100644 --- a/sdks/java/expansion-service/build.gradle +++ b/sdks/java/expansion-service/build.gradle @@ -41,7 +41,6 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":runners:java-fn-execution") implementation project(path: ":sdks:java:harness") - implementation library.java.snake_yaml permitUnusedDeclared project(path: ":model:fn-execution") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre diff --git a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionServiceConfig.java b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionServiceConfig.java index 7d8d1266c57a..dd4343502a99 100644 --- a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionServiceConfig.java +++ b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/ExpansionServiceConfig.java @@ -24,7 +24,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.yaml.snakeyaml.Yaml; +import org.apache.beam.sdk.schemas.utils.YamlUtils; @SuppressWarnings("nullness") @AutoValue @@ -47,8 +47,7 @@ public static ExpansionServiceConfig create( } static ExpansionServiceConfig parseFromYamlStream(InputStream inputStream) { - Yaml yaml = new Yaml(); - Map config = yaml.load(inputStream); + Map config = YamlUtils.inputStreamToMap(inputStream); if (config == null) { throw new IllegalArgumentException( diff --git a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/JavaClassLookupTransformProvider.java b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/JavaClassLookupTransformProvider.java index 5e0820da468d..9303dc45cfca 100644 --- a/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/JavaClassLookupTransformProvider.java +++ b/sdks/java/expansion-service/src/main/java/org/apache/beam/sdk/expansion/service/JavaClassLookupTransformProvider.java @@ -53,6 +53,7 @@ import org.apache.beam.sdk.schemas.Schema.TypeName; import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.schemas.SchemaTranslation; +import org.apache.beam.sdk.schemas.utils.YamlUtils; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.util.common.ReflectHelpers; @@ -62,7 +63,6 @@ import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.ByteString; import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.InvalidProtocolBufferException; import org.checkerframework.checker.nullness.qual.Nullable; -import org.yaml.snakeyaml.Yaml; /** * A transform provider that can be used to directly instantiate a transform using Java class name @@ -490,8 +490,7 @@ public static AllowList everything() { } static AllowList parseFromYamlStream(InputStream inputStream) { - Yaml yaml = new Yaml(); - Map config = yaml.load(inputStream); + Map config = YamlUtils.inputStreamToMap(inputStream); if (config == null) { throw new IllegalArgumentException( diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergReadSchemaTransformProviderTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergReadSchemaTransformProviderTest.java index 46168a487dda..3957efc76c8b 100644 --- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergReadSchemaTransformProviderTest.java +++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergReadSchemaTransformProviderTest.java @@ -28,6 +28,7 @@ import java.util.stream.Stream; import org.apache.beam.sdk.managed.Managed; import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.utils.YamlUtils; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.values.PCollection; @@ -40,7 +41,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.yaml.snakeyaml.Yaml; public class IcebergReadSchemaTransformProviderTest { @@ -163,7 +163,7 @@ public void testReadUsingManagedTransform() throws Exception { + " catalog_type: %s\n" + " warehouse_location: %s", identifier, CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP, warehouse.location); - Map configMap = new Yaml().load(yamlConfig); + Map configMap = YamlUtils.yamlStringToMap(yamlConfig); PCollection output = testPipeline diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergWriteSchemaTransformProviderTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergWriteSchemaTransformProviderTest.java index 9ef3e9945ec9..046930d8f815 100644 --- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergWriteSchemaTransformProviderTest.java +++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/IcebergWriteSchemaTransformProviderTest.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.UUID; import org.apache.beam.sdk.managed.Managed; +import org.apache.beam.sdk.schemas.utils.YamlUtils; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; @@ -47,7 +48,6 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import org.yaml.snakeyaml.Yaml; @RunWith(JUnit4.class) public class IcebergWriteSchemaTransformProviderTest { @@ -132,7 +132,7 @@ public void testWriteUsingManagedTransform() { + " catalog_type: %s\n" + " warehouse_location: %s", identifier, CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP, warehouse.location); - Map configMap = new Yaml().load(yamlConfig); + Map configMap = YamlUtils.yamlStringToMap(yamlConfig); PCollection inputRows = testPipeline diff --git a/sdks/java/transform-service/build.gradle b/sdks/java/transform-service/build.gradle index 91e6185152da..df6e2281b5c4 100644 --- a/sdks/java/transform-service/build.gradle +++ b/sdks/java/transform-service/build.gradle @@ -43,7 +43,6 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation library.java.vendored_grpc_1_60_1 implementation library.java.vendored_guava_32_1_2_jre - implementation library.java.snake_yaml testImplementation library.java.junit testImplementation library.java.mockito_core testImplementation project(path: ":runners:java-fn-execution") diff --git a/sdks/java/transform-service/src/main/java/org/apache/beam/sdk/transformservice/TransformServiceConfig.java b/sdks/java/transform-service/src/main/java/org/apache/beam/sdk/transformservice/TransformServiceConfig.java index bbada3205463..166d94ef55cc 100644 --- a/sdks/java/transform-service/src/main/java/org/apache/beam/sdk/transformservice/TransformServiceConfig.java +++ b/sdks/java/transform-service/src/main/java/org/apache/beam/sdk/transformservice/TransformServiceConfig.java @@ -22,7 +22,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.yaml.snakeyaml.Yaml; +import org.apache.beam.sdk.schemas.utils.YamlUtils; @AutoValue public abstract class TransformServiceConfig { @@ -41,8 +41,7 @@ static TransformServiceConfig create(List expansionservices) { } static TransformServiceConfig parseFromYamlStream(InputStream inputStream) { - Yaml yaml = new Yaml(); - Map config = yaml.load(inputStream); + Map config = YamlUtils.inputStreamToMap(inputStream); if (config == null) { throw new IllegalArgumentException( "Could not parse the provided YAML stream into a non-trivial TransformServiceConfig"); From 700a343e54b40a75ae2ae155ad8ecc8ac6c2f2e6 Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 11:38:37 -0400 Subject: [PATCH 3/6] revert #31485 and #31473 --- .../main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- sdks/java/container/license_scripts/dep_urls_java.yaml | 2 +- sdks/java/io/hadoop-format/build.gradle | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index e54946456d4e..78a6b371e6f0 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -617,7 +617,7 @@ class BeamModulePlugin implements Plugin { def influxdb_version = "2.19" def httpclient_version = "4.5.13" def httpcore_version = "4.4.14" - def jackson_version = "2.15.4" + def jackson_version = "2.14.1" def jaxb_api_version = "2.3.3" def jsr305_version = "3.0.2" def everit_json_version = "1.14.2" diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml b/sdks/java/container/license_scripts/dep_urls_java.yaml index e0b78f209b00..c3a3fd2708f2 100644 --- a/sdks/java/container/license_scripts/dep_urls_java.yaml +++ b/sdks/java/container/license_scripts/dep_urls_java.yaml @@ -58,7 +58,7 @@ xz: '1.5': # The original repo is down. This license is taken from https://tukaani.org/xz/java.html. license: "file://{}/xz/COPYING" jackson-bom: - '2.15.4': + '2.14.1': license: "https://raw.githubusercontent.com/FasterXML/jackson-bom/master/LICENSE" type: "Apache License 2.0" junit-dep: diff --git a/sdks/java/io/hadoop-format/build.gradle b/sdks/java/io/hadoop-format/build.gradle index c4def2c1de0e..fe2a04988808 100644 --- a/sdks/java/io/hadoop-format/build.gradle +++ b/sdks/java/io/hadoop-format/build.gradle @@ -47,8 +47,6 @@ configurations.testRuntimeClasspath { // Force use the old version of JAMM that cassandra relies on resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0' - // Pin snakeyaml version due to cassandra-all does not support 2.x - resolutionStrategy.force 'org.yaml:snakeyaml:1.33' } dependencies { From dfce791e2959dfa775ed7025dd40c690aee80b23 Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 12:26:03 -0400 Subject: [PATCH 4/6] bump jackson --- .../main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 78a6b371e6f0..e54946456d4e 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -617,7 +617,7 @@ class BeamModulePlugin implements Plugin { def influxdb_version = "2.19" def httpclient_version = "4.5.13" def httpcore_version = "4.4.14" - def jackson_version = "2.14.1" + def jackson_version = "2.15.4" def jaxb_api_version = "2.3.3" def jsr305_version = "3.0.2" def everit_json_version = "1.14.2" From feebd1f3a2a734afa8c9214c2a1a86d3ebdc257a Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 12:27:46 -0400 Subject: [PATCH 5/6] bump dep_urls_java jackson --- sdks/java/container/license_scripts/dep_urls_java.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml b/sdks/java/container/license_scripts/dep_urls_java.yaml index c3a3fd2708f2..e0b78f209b00 100644 --- a/sdks/java/container/license_scripts/dep_urls_java.yaml +++ b/sdks/java/container/license_scripts/dep_urls_java.yaml @@ -58,7 +58,7 @@ xz: '1.5': # The original repo is down. This license is taken from https://tukaani.org/xz/java.html. license: "file://{}/xz/COPYING" jackson-bom: - '2.14.1': + '2.15.4': license: "https://raw.githubusercontent.com/FasterXML/jackson-bom/master/LICENSE" type: "Apache License 2.0" junit-dep: From 7640776b3abe3a2a91a8230ceaaa608444af3794 Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud Date: Wed, 5 Jun 2024 12:35:54 -0400 Subject: [PATCH 6/6] pin snakeyaml 1.33 for hadoop-format --- sdks/java/io/hadoop-format/build.gradle | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/java/io/hadoop-format/build.gradle b/sdks/java/io/hadoop-format/build.gradle index fe2a04988808..c4def2c1de0e 100644 --- a/sdks/java/io/hadoop-format/build.gradle +++ b/sdks/java/io/hadoop-format/build.gradle @@ -47,6 +47,8 @@ configurations.testRuntimeClasspath { // Force use the old version of JAMM that cassandra relies on resolutionStrategy.force 'com.github.jbellis:jamm:0.3.0' + // Pin snakeyaml version due to cassandra-all does not support 2.x + resolutionStrategy.force 'org.yaml:snakeyaml:1.33' } dependencies {