From fd96c326b2ea31ad2477e1c009806248f7885475 Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Thu, 12 Dec 2024 14:46:51 -0500 Subject: [PATCH 1/7] Parameterize GoogleCloudStorage provider in GcsUtil to unblock gcs-connector 3.x --- .../extensions/gcp/options/GcsOptions.java | 23 ++++++++++ .../beam/sdk/extensions/gcp/util/GcsUtil.java | 43 +++++++++++++------ .../extensions/gcp/GcpCoreApiSurfaceTest.java | 1 + .../sdk/extensions/gcp/util/GcsUtilTest.java | 25 ++++++----- 4 files changed, 69 insertions(+), 23 deletions(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java index 1285b88663e7..177713bdcbee 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java @@ -18,6 +18,11 @@ package org.apache.beam.sdk.extensions.gcp.options; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.api.client.http.HttpRequestInitializer; +import com.google.api.services.storage.Storage; +import com.google.auth.Credentials; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorage; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions; import com.google.cloud.hadoop.util.AsyncWriteChannelOptions; import java.util.concurrent.ExecutorService; @@ -54,6 +59,15 @@ public interface GcsOptions extends ApplicationNameOptions, GcpOptions, Pipeline void setGoogleCloudStorageReadOptions(GoogleCloudStorageReadOptions value); + @JsonIgnore + @Description( + "The GoogleCloudStorageProvider instance that should be used to instantiate a GoogleCloudStorage client.") + @Default.InstanceFactory(GcsUtil.GoogleCloudStorageProviderFactory.class) + @Hidden + GoogleCloudStorageProvider getGoogleCloudStorageProvider(); + + void setGoogleCloudStorageProvider(GoogleCloudStorageProvider value); + /** * The ExecutorService instance to use to create threads, can be overridden to specify an * ExecutorService that is compatible with the user's environment. If unset, the default is to use @@ -208,4 +222,13 @@ public PathValidator create(PipelineOptions options) { .build(); } } + + @FunctionalInterface + public interface GoogleCloudStorageProvider { + GoogleCloudStorage get( + GoogleCloudStorageOptions options, + Storage storage, + Credentials credentials, + HttpRequestInitializer httpRequestInitializer); + } } diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index d58154132a72..885e0aec532a 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -162,7 +162,8 @@ public GcsUtil create(PipelineOptions options) { gcsOptions.getEnableBucketWriteMetricCounter() ? gcsOptions.getGcsWriteCounterPrefix() : null), - gcsOptions.getGoogleCloudStorageReadOptions()); + gcsOptions.getGoogleCloudStorageReadOptions(), + gcsOptions.getGoogleCloudStorageProvider()); } /** Returns an instance of {@link GcsUtil} based on the given parameters. */ @@ -174,7 +175,8 @@ public static GcsUtil create( Credentials credentials, @Nullable Integer uploadBufferSizeBytes, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions) { + GoogleCloudStorageReadOptions gcsReadOptions, + GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { return new GcsUtil( storageClient, httpRequestInitializer, @@ -184,7 +186,17 @@ public static GcsUtil create( uploadBufferSizeBytes, null, gcsCountersOptions, - gcsReadOptions); + gcsReadOptions, + googleCloudStorageProvider); + } + } + + public static class GoogleCloudStorageProviderFactory + implements DefaultValueFactory { + @Override + public GcsOptions.GoogleCloudStorageProvider create(PipelineOptions options) { + return (storageOptions, storage, credentials, httpRequestInitializer) -> + new GoogleCloudStorageImpl(storageOptions, storage, credentials); } } @@ -228,6 +240,7 @@ public static GcsUtil create( private final Credentials credentials; private GoogleCloudStorage googleCloudStorage; + private GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider; private GoogleCloudStorageOptions googleCloudStorageOptions; private final int rewriteDataOpBatchLimit; @@ -261,7 +274,8 @@ public static boolean isWildcard(GcsPath spec) { @Nullable Integer uploadBufferSizeBytes, @Nullable Integer rewriteDataOpBatchLimit, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions) { + GoogleCloudStorageReadOptions gcsReadOptions, + GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { this.storageClient = storageClient; this.httpRequestInitializer = httpRequestInitializer; this.uploadBufferSizeBytes = uploadBufferSizeBytes; @@ -269,6 +283,7 @@ public static boolean isWildcard(GcsPath spec) { this.credentials = credentials; this.maxBytesRewrittenPerCall = null; this.numRewriteTokensUsed = null; + this.googleCloudStorageProvider = googleCloudStorageProvider; googleCloudStorageOptions = GoogleCloudStorageOptions.builder() .setAppName("Beam") @@ -276,7 +291,8 @@ public static boolean isWildcard(GcsPath spec) { .setGrpcEnabled(shouldUseGrpc) .build(); googleCloudStorage = - createGoogleCloudStorage(googleCloudStorageOptions, storageClient, credentials); + googleCloudStorageProvider.get( + googleCloudStorageOptions, storageClient, credentials, httpRequestInitializer); this.batchRequestSupplier = () -> { // Capture reference to this so that the most recent storageClient and initializer @@ -509,6 +525,11 @@ void setCloudStorageImpl(GoogleCloudStorageOptions g) { googleCloudStorageOptions = g; } + @VisibleForTesting + void setCloudStorageProviderImpl(GcsOptions.GoogleCloudStorageProvider p) { + googleCloudStorageProvider = p; + } + /** * Create an integer consumer that updates the counter identified by a prefix and a bucket name. */ @@ -695,8 +716,11 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO GoogleCloudStorageOptions newGoogleCloudStorageOptions = googleCloudStorageOptions.toBuilder().setWriteChannelOptions(wcOptions).build(); GoogleCloudStorage gcpStorage = - createGoogleCloudStorage( - newGoogleCloudStorageOptions, this.storageClient, this.credentials); + googleCloudStorageProvider.get( + newGoogleCloudStorageOptions, + this.storageClient, + this.credentials, + httpRequestInitializer); StorageResourceId resourceId = new StorageResourceId( path.getBucket(), @@ -737,11 +761,6 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO } } - GoogleCloudStorage createGoogleCloudStorage( - GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { - return new GoogleCloudStorageImpl(options, storage, credentials); - } - /** * Checks whether the GCS bucket exists. Similar to {@link #bucketAccessible(GcsPath)}, but throws * exception if the bucket is inaccessible due to permissions or does not exist. diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java index 26d98125a3af..600aacb7077d 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java @@ -52,6 +52,7 @@ public void testGcpCoreApiSurface() throws Exception { classesInPackage("com.google.api.client.http"), classesInPackage("com.google.api.client.json"), classesInPackage("com.google.api.client.util"), + classesInPackage("com.google.cloud.hadoop.util"), classesInPackage("com.google.api.services.storage"), classesInPackage("com.google.auth"), classesInPackage("com.fasterxml.jackson.annotation"), diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java index 97082572ce41..fc2325566239 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java @@ -1626,6 +1626,8 @@ public static GcsUtilMock createMockWithMockStorage(PipelineOptions options, byt gcsUtilMock.googleCloudStorage = googleCloudStorageMock; // set the mock in the super object as well gcsUtilMock.setCloudStorageImpl(gcsUtilMock.googleCloudStorage); + gcsUtilMock.setCloudStorageProviderImpl( + (storageOpts, storage, credentials, httpRequestInitializer) -> googleCloudStorageMock); if (readPayload == null) { Mockito.when(googleCloudStorageMock.create(Mockito.any(), Mockito.any())) @@ -1657,7 +1659,8 @@ public static GcsUtilMock createMock(PipelineOptions options) { gcsOptions.getEnableBucketWriteMetricCounter() ? gcsOptions.getGcsWriteCounterPrefix() : null), - gcsOptions.getGoogleCloudStorageReadOptions()); + gcsOptions.getGoogleCloudStorageReadOptions(), + gcsOptions.getGoogleCloudStorageProvider()); } private GcsUtilMock( @@ -1669,7 +1672,8 @@ private GcsUtilMock( @Nullable Integer uploadBufferSizeBytes, @Nullable Integer rewriteDataOpBatchLimit, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions) { + GoogleCloudStorageReadOptions gcsReadOptions, + GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { super( storageClient, httpRequestInitializer, @@ -1679,13 +1683,8 @@ private GcsUtilMock( uploadBufferSizeBytes, rewriteDataOpBatchLimit, gcsCountersOptions, - gcsReadOptions); - } - - @Override - GoogleCloudStorage createGoogleCloudStorage( - GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { - return googleCloudStorage; + gcsReadOptions, + googleCloudStorageProvider); } } @@ -1698,7 +1697,9 @@ public void testCreate() throws IOException { GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class); WritableByteChannel mockChannel = Mockito.mock(WritableByteChannel.class); - gcsUtil.googleCloudStorage = mockStorage; + gcsUtil.setCloudStorageImpl(mockStorage); + gcsUtil.setCloudStorageProviderImpl( + (options, storage, credentials, httpRequestInitializer) -> mockStorage); when(mockStorage.create(any(), any())).thenReturn(mockChannel); @@ -1716,7 +1717,9 @@ public void testCreateWithException() throws IOException { GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class); - gcsUtil.googleCloudStorage = mockStorage; + gcsUtil.setCloudStorageImpl(mockStorage); + gcsUtil.setCloudStorageProviderImpl( + (options, storage, credentials, httpRequestInitializer) -> mockStorage); when(mockStorage.create(any(), any())).thenThrow(new RuntimeException("testException")); From 45eff100404b116e7a9f2a848040f39b172304d2 Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Fri, 10 Jan 2025 13:48:54 -0500 Subject: [PATCH 2/7] Use Reflection to attempt 3.x Builder, and fall back to 2.x Constructor --- .../extensions/gcp/options/GcsOptions.java | 23 ------ .../beam/sdk/extensions/gcp/util/GcsUtil.java | 80 ++++++++++++------- .../extensions/gcp/GcpCoreApiSurfaceTest.java | 1 - .../sdk/extensions/gcp/util/GcsUtilTest.java | 25 +++--- 4 files changed, 60 insertions(+), 69 deletions(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java index 177713bdcbee..1285b88663e7 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcsOptions.java @@ -18,11 +18,6 @@ package org.apache.beam.sdk.extensions.gcp.options; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.services.storage.Storage; -import com.google.auth.Credentials; -import com.google.cloud.hadoop.gcsio.GoogleCloudStorage; -import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions; import com.google.cloud.hadoop.util.AsyncWriteChannelOptions; import java.util.concurrent.ExecutorService; @@ -59,15 +54,6 @@ public interface GcsOptions extends ApplicationNameOptions, GcpOptions, Pipeline void setGoogleCloudStorageReadOptions(GoogleCloudStorageReadOptions value); - @JsonIgnore - @Description( - "The GoogleCloudStorageProvider instance that should be used to instantiate a GoogleCloudStorage client.") - @Default.InstanceFactory(GcsUtil.GoogleCloudStorageProviderFactory.class) - @Hidden - GoogleCloudStorageProvider getGoogleCloudStorageProvider(); - - void setGoogleCloudStorageProvider(GoogleCloudStorageProvider value); - /** * The ExecutorService instance to use to create threads, can be overridden to specify an * ExecutorService that is compatible with the user's environment. If unset, the default is to use @@ -222,13 +208,4 @@ public PathValidator create(PipelineOptions options) { .build(); } } - - @FunctionalInterface - public interface GoogleCloudStorageProvider { - GoogleCloudStorage get( - GoogleCloudStorageOptions options, - Storage storage, - Credentials credentials, - HttpRequestInitializer httpRequestInitializer); - } } diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index 885e0aec532a..fe64c6f02d72 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -30,6 +30,7 @@ import com.google.api.client.http.HttpHeaders; import com.google.api.client.http.HttpRequestInitializer; import com.google.api.client.http.HttpStatusCodes; +import com.google.api.client.http.HttpTransport; import com.google.api.client.util.BackOff; import com.google.api.client.util.Sleeper; import com.google.api.services.storage.Storage; @@ -52,6 +53,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.Method; import java.nio.channels.SeekableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.AccessDeniedException; @@ -162,8 +164,7 @@ public GcsUtil create(PipelineOptions options) { gcsOptions.getEnableBucketWriteMetricCounter() ? gcsOptions.getGcsWriteCounterPrefix() : null), - gcsOptions.getGoogleCloudStorageReadOptions(), - gcsOptions.getGoogleCloudStorageProvider()); + gcsOptions.getGoogleCloudStorageReadOptions()); } /** Returns an instance of {@link GcsUtil} based on the given parameters. */ @@ -175,8 +176,7 @@ public static GcsUtil create( Credentials credentials, @Nullable Integer uploadBufferSizeBytes, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions, - GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { + GoogleCloudStorageReadOptions gcsReadOptions) { return new GcsUtil( storageClient, httpRequestInitializer, @@ -186,17 +186,7 @@ public static GcsUtil create( uploadBufferSizeBytes, null, gcsCountersOptions, - gcsReadOptions, - googleCloudStorageProvider); - } - } - - public static class GoogleCloudStorageProviderFactory - implements DefaultValueFactory { - @Override - public GcsOptions.GoogleCloudStorageProvider create(PipelineOptions options) { - return (storageOptions, storage, credentials, httpRequestInitializer) -> - new GoogleCloudStorageImpl(storageOptions, storage, credentials); + gcsReadOptions); } } @@ -240,7 +230,6 @@ public GcsOptions.GoogleCloudStorageProvider create(PipelineOptions options) { private final Credentials credentials; private GoogleCloudStorage googleCloudStorage; - private GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider; private GoogleCloudStorageOptions googleCloudStorageOptions; private final int rewriteDataOpBatchLimit; @@ -274,8 +263,7 @@ public static boolean isWildcard(GcsPath spec) { @Nullable Integer uploadBufferSizeBytes, @Nullable Integer rewriteDataOpBatchLimit, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions, - GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { + GoogleCloudStorageReadOptions gcsReadOptions) { this.storageClient = storageClient; this.httpRequestInitializer = httpRequestInitializer; this.uploadBufferSizeBytes = uploadBufferSizeBytes; @@ -283,7 +271,6 @@ public static boolean isWildcard(GcsPath spec) { this.credentials = credentials; this.maxBytesRewrittenPerCall = null; this.numRewriteTokensUsed = null; - this.googleCloudStorageProvider = googleCloudStorageProvider; googleCloudStorageOptions = GoogleCloudStorageOptions.builder() .setAppName("Beam") @@ -291,8 +278,7 @@ public static boolean isWildcard(GcsPath spec) { .setGrpcEnabled(shouldUseGrpc) .build(); googleCloudStorage = - googleCloudStorageProvider.get( - googleCloudStorageOptions, storageClient, credentials, httpRequestInitializer); + createGoogleCloudStorage(googleCloudStorageOptions, storageClient, credentials); this.batchRequestSupplier = () -> { // Capture reference to this so that the most recent storageClient and initializer @@ -525,11 +511,6 @@ void setCloudStorageImpl(GoogleCloudStorageOptions g) { googleCloudStorageOptions = g; } - @VisibleForTesting - void setCloudStorageProviderImpl(GcsOptions.GoogleCloudStorageProvider p) { - googleCloudStorageProvider = p; - } - /** * Create an integer consumer that updates the counter identified by a prefix and a bucket name. */ @@ -716,11 +697,8 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO GoogleCloudStorageOptions newGoogleCloudStorageOptions = googleCloudStorageOptions.toBuilder().setWriteChannelOptions(wcOptions).build(); GoogleCloudStorage gcpStorage = - googleCloudStorageProvider.get( - newGoogleCloudStorageOptions, - this.storageClient, - this.credentials, - httpRequestInitializer); + createGoogleCloudStorage( + newGoogleCloudStorageOptions, this.storageClient, this.credentials); StorageResourceId resourceId = new StorageResourceId( path.getBucket(), @@ -761,6 +739,46 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO } } + GoogleCloudStorage createGoogleCloudStorage( + GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { + try { + // Attempt to construct gcs-connector 3.x-style GoogleCloudStorage, which is created + // exclusively via Builder method; this can be replaced once Java 8 is dropped and + // Beam can upgrade to gcs-connector 3.x + final Method builderMethod = GoogleCloudStorageImpl.class.getMethod("builder"); + Object builder = builderMethod.invoke(null); + final Class builderClass = + Class.forName("com.google.cloud.hadoop.gcsio.AutoBuilder_GoogleCloudStorageImpl_Builder"); + + final Method setOptionsMethod = + builderClass.getMethod("setOptions", GoogleCloudStorageOptions.class); + setOptionsMethod.setAccessible(true); + final Method setHttpTransportMethod = + builderClass.getMethod("setHttpTransport", HttpTransport.class); + setHttpTransportMethod.setAccessible(true); + final Method setCredentialsMethod = + builderClass.getMethod("setCredentials", Credentials.class); + setCredentialsMethod.setAccessible(true); + final Method setHttpRequestInitializerMethod = + builderClass.getMethod("setHttpRequestInitializer", HttpRequestInitializer.class); + setHttpRequestInitializerMethod.setAccessible(true); + + builder = setOptionsMethod.invoke(builder, options); + builder = setHttpTransportMethod.invoke(builder, storage.getRequestFactory().getTransport()); + builder = setCredentialsMethod.invoke(builder, credentials); + builder = setHttpRequestInitializerMethod.invoke(builder, httpRequestInitializer); + + final Method buildMethod = builderClass.getMethod("build"); + buildMethod.setAccessible(true); + return (GoogleCloudStorage) buildMethod.invoke(builder); + } catch (Exception e) { + // An exception means that local gcs-connector is still on 2.x; use Constructor method + // directly + // return new GoogleCloudStorageImpl(options, storage, credentials); + throw new RuntimeException("Failed to construct Storage", e); + } + } + /** * Checks whether the GCS bucket exists. Similar to {@link #bucketAccessible(GcsPath)}, but throws * exception if the bucket is inaccessible due to permissions or does not exist. diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java index 600aacb7077d..26d98125a3af 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/GcpCoreApiSurfaceTest.java @@ -52,7 +52,6 @@ public void testGcpCoreApiSurface() throws Exception { classesInPackage("com.google.api.client.http"), classesInPackage("com.google.api.client.json"), classesInPackage("com.google.api.client.util"), - classesInPackage("com.google.cloud.hadoop.util"), classesInPackage("com.google.api.services.storage"), classesInPackage("com.google.auth"), classesInPackage("com.fasterxml.jackson.annotation"), diff --git a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java index fc2325566239..97082572ce41 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/test/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtilTest.java @@ -1626,8 +1626,6 @@ public static GcsUtilMock createMockWithMockStorage(PipelineOptions options, byt gcsUtilMock.googleCloudStorage = googleCloudStorageMock; // set the mock in the super object as well gcsUtilMock.setCloudStorageImpl(gcsUtilMock.googleCloudStorage); - gcsUtilMock.setCloudStorageProviderImpl( - (storageOpts, storage, credentials, httpRequestInitializer) -> googleCloudStorageMock); if (readPayload == null) { Mockito.when(googleCloudStorageMock.create(Mockito.any(), Mockito.any())) @@ -1659,8 +1657,7 @@ public static GcsUtilMock createMock(PipelineOptions options) { gcsOptions.getEnableBucketWriteMetricCounter() ? gcsOptions.getGcsWriteCounterPrefix() : null), - gcsOptions.getGoogleCloudStorageReadOptions(), - gcsOptions.getGoogleCloudStorageProvider()); + gcsOptions.getGoogleCloudStorageReadOptions()); } private GcsUtilMock( @@ -1672,8 +1669,7 @@ private GcsUtilMock( @Nullable Integer uploadBufferSizeBytes, @Nullable Integer rewriteDataOpBatchLimit, GcsCountersOptions gcsCountersOptions, - GoogleCloudStorageReadOptions gcsReadOptions, - GcsOptions.GoogleCloudStorageProvider googleCloudStorageProvider) { + GoogleCloudStorageReadOptions gcsReadOptions) { super( storageClient, httpRequestInitializer, @@ -1683,8 +1679,13 @@ private GcsUtilMock( uploadBufferSizeBytes, rewriteDataOpBatchLimit, gcsCountersOptions, - gcsReadOptions, - googleCloudStorageProvider); + gcsReadOptions); + } + + @Override + GoogleCloudStorage createGoogleCloudStorage( + GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { + return googleCloudStorage; } } @@ -1697,9 +1698,7 @@ public void testCreate() throws IOException { GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class); WritableByteChannel mockChannel = Mockito.mock(WritableByteChannel.class); - gcsUtil.setCloudStorageImpl(mockStorage); - gcsUtil.setCloudStorageProviderImpl( - (options, storage, credentials, httpRequestInitializer) -> mockStorage); + gcsUtil.googleCloudStorage = mockStorage; when(mockStorage.create(any(), any())).thenReturn(mockChannel); @@ -1717,9 +1716,7 @@ public void testCreateWithException() throws IOException { GoogleCloudStorage mockStorage = Mockito.mock(GoogleCloudStorage.class); - gcsUtil.setCloudStorageImpl(mockStorage); - gcsUtil.setCloudStorageProviderImpl( - (options, storage, credentials, httpRequestInitializer) -> mockStorage); + gcsUtil.googleCloudStorage = mockStorage; when(mockStorage.create(any(), any())).thenThrow(new RuntimeException("testException")); From b829452dbe86c88ac411efbec97133a46bf05dbd Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Fri, 10 Jan 2025 13:54:56 -0500 Subject: [PATCH 3/7] Attempt constructing 3.x-style GCS via reflection; fall back to 2.x constructor --- .../beam/sdk/extensions/gcp/util/GcsUtil.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index fe64c6f02d72..3b4eaed7a844 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -744,7 +744,7 @@ GoogleCloudStorage createGoogleCloudStorage( try { // Attempt to construct gcs-connector 3.x-style GoogleCloudStorage, which is created // exclusively via Builder method; this can be replaced once Java 8 is dropped and - // Beam can upgrade to gcs-connector 3.x + // Beam can upgrade to gcsio 3.x final Method builderMethod = GoogleCloudStorageImpl.class.getMethod("builder"); Object builder = builderMethod.invoke(null); final Class builderClass = @@ -753,29 +753,29 @@ GoogleCloudStorage createGoogleCloudStorage( final Method setOptionsMethod = builderClass.getMethod("setOptions", GoogleCloudStorageOptions.class); setOptionsMethod.setAccessible(true); + builder = setOptionsMethod.invoke(builder, options); + final Method setHttpTransportMethod = builderClass.getMethod("setHttpTransport", HttpTransport.class); setHttpTransportMethod.setAccessible(true); + builder = setHttpTransportMethod.invoke(builder, storage.getRequestFactory().getTransport()); + final Method setCredentialsMethod = builderClass.getMethod("setCredentials", Credentials.class); setCredentialsMethod.setAccessible(true); + builder = setCredentialsMethod.invoke(builder, credentials); + final Method setHttpRequestInitializerMethod = builderClass.getMethod("setHttpRequestInitializer", HttpRequestInitializer.class); setHttpRequestInitializerMethod.setAccessible(true); - - builder = setOptionsMethod.invoke(builder, options); - builder = setHttpTransportMethod.invoke(builder, storage.getRequestFactory().getTransport()); - builder = setCredentialsMethod.invoke(builder, credentials); builder = setHttpRequestInitializerMethod.invoke(builder, httpRequestInitializer); final Method buildMethod = builderClass.getMethod("build"); buildMethod.setAccessible(true); return (GoogleCloudStorage) buildMethod.invoke(builder); } catch (Exception e) { - // An exception means that local gcs-connector is still on 2.x; use Constructor method - // directly - // return new GoogleCloudStorageImpl(options, storage, credentials); - throw new RuntimeException("Failed to construct Storage", e); + // An exception means that local gcsio version is still 2.x; use Constructor directly + return new GoogleCloudStorageImpl(options, storage, credentials); } } From ca8f9c05ff491af607d8f289afbe02f1abe82887 Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Fri, 10 Jan 2025 14:31:17 -0500 Subject: [PATCH 4/7] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 44f5fe88c4dc..2c00cd02e7d1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -62,6 +62,7 @@ ## I/Os +* Support gcs-connector 3.x+ in GcsUtil ([#33368](https://github.com/apache/beam/pull/33368)) * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## New Features / Improvements From 7c5e744e14f53ed7ebdd99c795a767e0547fab15 Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Fri, 10 Jan 2025 14:40:51 -0500 Subject: [PATCH 5/7] Add TODO note on reflection block --- .../org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index 3b4eaed7a844..38ccb050cd55 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -742,9 +742,8 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO GoogleCloudStorage createGoogleCloudStorage( GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { try { - // Attempt to construct gcs-connector 3.x-style GoogleCloudStorage, which is created - // exclusively via Builder method; this can be replaced once Java 8 is dropped and - // Beam can upgrade to gcsio 3.x + // Attempt to construct gcs-connector 3.x GoogleCloudStorage, which uses a Builder + // TODO eliminate reflection once Beam drops Java 8 support and upgrades to gcsio 3.x final Method builderMethod = GoogleCloudStorageImpl.class.getMethod("builder"); Object builder = builderMethod.invoke(null); final Class builderClass = From 72bca6b43151bf686e0b7a5773c55cf34877b14f Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Tue, 14 Jan 2025 08:40:03 -0500 Subject: [PATCH 6/7] Try non-reflected construction first --- .../beam/sdk/extensions/gcp/util/GcsUtil.java | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index 38ccb050cd55..156222b1c1b6 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -742,39 +742,45 @@ public WritableByteChannel create(GcsPath path, CreateOptions options) throws IO GoogleCloudStorage createGoogleCloudStorage( GoogleCloudStorageOptions options, Storage storage, Credentials credentials) { try { - // Attempt to construct gcs-connector 3.x GoogleCloudStorage, which uses a Builder - // TODO eliminate reflection once Beam drops Java 8 support and upgrades to gcsio 3.x - final Method builderMethod = GoogleCloudStorageImpl.class.getMethod("builder"); - Object builder = builderMethod.invoke(null); - final Class builderClass = - Class.forName("com.google.cloud.hadoop.gcsio.AutoBuilder_GoogleCloudStorageImpl_Builder"); - - final Method setOptionsMethod = - builderClass.getMethod("setOptions", GoogleCloudStorageOptions.class); - setOptionsMethod.setAccessible(true); - builder = setOptionsMethod.invoke(builder, options); - - final Method setHttpTransportMethod = - builderClass.getMethod("setHttpTransport", HttpTransport.class); - setHttpTransportMethod.setAccessible(true); - builder = setHttpTransportMethod.invoke(builder, storage.getRequestFactory().getTransport()); - - final Method setCredentialsMethod = - builderClass.getMethod("setCredentials", Credentials.class); - setCredentialsMethod.setAccessible(true); - builder = setCredentialsMethod.invoke(builder, credentials); - - final Method setHttpRequestInitializerMethod = - builderClass.getMethod("setHttpRequestInitializer", HttpRequestInitializer.class); - setHttpRequestInitializerMethod.setAccessible(true); - builder = setHttpRequestInitializerMethod.invoke(builder, httpRequestInitializer); - - final Method buildMethod = builderClass.getMethod("build"); - buildMethod.setAccessible(true); - return (GoogleCloudStorage) buildMethod.invoke(builder); - } catch (Exception e) { - // An exception means that local gcsio version is still 2.x; use Constructor directly return new GoogleCloudStorageImpl(options, storage, credentials); + } catch (NoSuchMethodError e) { + // gcs-connector 3.x drops the direct constructor and exclusively uses Builder + // TODO eliminate reflection once Beam drops Java 8 support and upgrades to gcsio 3.x + try { + final Method builderMethod = GoogleCloudStorageImpl.class.getMethod("builder"); + Object builder = builderMethod.invoke(null); + final Class builderClass = + Class.forName( + "com.google.cloud.hadoop.gcsio.AutoBuilder_GoogleCloudStorageImpl_Builder"); + + final Method setOptionsMethod = + builderClass.getMethod("setOptions", GoogleCloudStorageOptions.class); + setOptionsMethod.setAccessible(true); + builder = setOptionsMethod.invoke(builder, options); + + final Method setHttpTransportMethod = + builderClass.getMethod("setHttpTransport", HttpTransport.class); + setHttpTransportMethod.setAccessible(true); + builder = + setHttpTransportMethod.invoke(builder, storage.getRequestFactory().getTransport()); + + final Method setCredentialsMethod = + builderClass.getMethod("setCredentials", Credentials.class); + setCredentialsMethod.setAccessible(true); + builder = setCredentialsMethod.invoke(builder, credentials); + + final Method setHttpRequestInitializerMethod = + builderClass.getMethod("setHttpRequestInitializer", HttpRequestInitializer.class); + setHttpRequestInitializerMethod.setAccessible(true); + builder = setHttpRequestInitializerMethod.invoke(builder, httpRequestInitializer); + + final Method buildMethod = builderClass.getMethod("build"); + buildMethod.setAccessible(true); + return (GoogleCloudStorage) buildMethod.invoke(builder); + } catch (Exception reflectionError) { + throw new RuntimeException( + "Failed to construct GoogleCloudStorageImpl from gcsio 3.x Builder", e); + } } } From e4e272a4f981b58e9c70df11dd381f0bbf5f2fad Mon Sep 17 00:00:00 2001 From: Claire McGinty Date: Tue, 14 Jan 2025 10:21:26 -0500 Subject: [PATCH 7/7] Fix SpotBugs error --- .../java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java index 156222b1c1b6..caa59c87b5dd 100644 --- a/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java +++ b/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/GcsUtil.java @@ -779,7 +779,7 @@ GoogleCloudStorage createGoogleCloudStorage( return (GoogleCloudStorage) buildMethod.invoke(builder); } catch (Exception reflectionError) { throw new RuntimeException( - "Failed to construct GoogleCloudStorageImpl from gcsio 3.x Builder", e); + "Failed to construct GoogleCloudStorageImpl from gcsio 3.x Builder", reflectionError); } } }