From 7c81b93fc980b985e91c88203088f8222684bd2b Mon Sep 17 00:00:00 2001 From: nielm Date: Sun, 26 Apr 2020 18:27:04 +0200 Subject: [PATCH 1/4] Populate all SpannerIO batching parameters in display data. Add all the grouping/batching parameters in SpannerIO populateDisplayData(). --- .../beam/sdk/io/gcp/spanner/SpannerIO.java | 30 +++++++++++++++- .../io/gcp/spanner/SpannerIOWriteTest.java | 36 +++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java index 16ed4b5c0c0d..9e6fee8e9f89 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java @@ -955,7 +955,17 @@ public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); getSpannerConfig().populateDisplayData(builder); builder.add( - DisplayData.item("batchSizeBytes", getBatchSizeBytes()).withLabel("Batch Size in Bytes")); + DisplayData.item("batchSizeBytes", getBatchSizeBytes()) + .withLabel("Max batch Size in Bytes")); + builder.add( + DisplayData.item("maxNumMutations", getMaxNumMutations()) + .withLabel("Max number of mutated cells in batches")); + builder.add( + DisplayData.item("maxNumRows", getMaxNumRows()) + .withLabel("Max number of rows in Batches")); + builder.add( + DisplayData.item("groupingFactor", getGroupingFactor()) + .withLabel("Number of batches to sort over")); } } @@ -992,6 +1002,24 @@ public WriteGrouped(Write spec) { this.spec = spec; } + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + spec.getSpannerConfig().populateDisplayData(builder); + builder.add( + DisplayData.item("batchSizeBytes", spec.getBatchSizeBytes()) + .withLabel("Max batch Size in Bytes")); + builder.add( + DisplayData.item("maxNumMutations", spec.getMaxNumMutations()) + .withLabel("Max number of mutated cells in batches")); + builder.add( + DisplayData.item("maxNumRows", spec.getMaxNumRows()) + .withLabel("Max number of rows in Batches")); + builder.add( + DisplayData.item("groupingFactor", spec.getGroupingFactor()) + .withLabel("Number of batches to sort over")); + } + @Override public SpannerWriteResult expand(PCollection input) { diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java index 6129882b4cf7..aaca469105c4 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java @@ -589,20 +589,50 @@ public void retryOnAbortedAndDeadlineExceeded() throws InterruptedException { } @Test - public void displayData() throws Exception { + public void displayDataWrite() throws Exception { SpannerIO.Write write = SpannerIO.write() .withProjectId("test-project") .withInstanceId("test-instance") .withDatabaseId("test-database") - .withBatchSizeBytes(123); + .withBatchSizeBytes(123) + .withMaxNumMutations(456) + .withMaxNumRows(789) + .withGroupingFactor(100); DisplayData data = DisplayData.from(write); - assertThat(data.items(), hasSize(4)); + assertThat(data.items(), hasSize(7)); assertThat(data, hasDisplayItem("projectId", "test-project")); assertThat(data, hasDisplayItem("instanceId", "test-instance")); assertThat(data, hasDisplayItem("databaseId", "test-database")); assertThat(data, hasDisplayItem("batchSizeBytes", 123)); + assertThat(data, hasDisplayItem("maxNumMutations", 456)); + assertThat(data, hasDisplayItem("maxNumRows", 789)); + assertThat(data, hasDisplayItem("groupingFactor", 100)); + } + + @Test + public void displayDataWriteGrouped() throws Exception { + SpannerIO.WriteGrouped writeGrouped = + SpannerIO.write() + .withProjectId("test-project") + .withInstanceId("test-instance") + .withDatabaseId("test-database") + .withBatchSizeBytes(123) + .withMaxNumMutations(456) + .withMaxNumRows(789) + .withGroupingFactor(100) + .grouped(); + + DisplayData data = DisplayData.from(writeGrouped); + assertThat(data.items(), hasSize(7)); + assertThat(data, hasDisplayItem("projectId", "test-project")); + assertThat(data, hasDisplayItem("instanceId", "test-instance")); + assertThat(data, hasDisplayItem("databaseId", "test-database")); + assertThat(data, hasDisplayItem("batchSizeBytes", 123)); + assertThat(data, hasDisplayItem("maxNumMutations", 456)); + assertThat(data, hasDisplayItem("maxNumRows", 789)); + assertThat(data, hasDisplayItem("groupingFactor", 100)); } @Test From 9ded9e299b4ffe662437f17315aea6477ff4bcd7 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Thu, 14 May 2020 12:35:11 -0700 Subject: [PATCH 2/4] Fix capitalization, clarify descriptions --- .../java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java index 9e6fee8e9f89..d2028bd0fd2f 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java @@ -956,13 +956,13 @@ public void populateDisplayData(DisplayData.Builder builder) { getSpannerConfig().populateDisplayData(builder); builder.add( DisplayData.item("batchSizeBytes", getBatchSizeBytes()) - .withLabel("Max batch Size in Bytes")); + .withLabel("Max batch size in bytes")); builder.add( DisplayData.item("maxNumMutations", getMaxNumMutations()) - .withLabel("Max number of mutated cells in batches")); + .withLabel("Max number of mutated cells in each batch")); builder.add( DisplayData.item("maxNumRows", getMaxNumRows()) - .withLabel("Max number of rows in Batches")); + .withLabel("Max number of rows in each batch")); builder.add( DisplayData.item("groupingFactor", getGroupingFactor()) .withLabel("Number of batches to sort over")); From 192e9ad146779c7ec5dd2ec6e3ffcbb94ff1f8aa Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Thu, 14 May 2020 12:37:48 -0700 Subject: [PATCH 3/4] fix capitalization, clarify description Grouped --- .../java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java index d2028bd0fd2f..3d7126d703b3 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java @@ -1008,13 +1008,13 @@ public void populateDisplayData(DisplayData.Builder builder) { spec.getSpannerConfig().populateDisplayData(builder); builder.add( DisplayData.item("batchSizeBytes", spec.getBatchSizeBytes()) - .withLabel("Max batch Size in Bytes")); + .withLabel("Max batch size in sytes")); builder.add( DisplayData.item("maxNumMutations", spec.getMaxNumMutations()) - .withLabel("Max number of mutated cells in batches")); + .withLabel("Max number of mutated cells in each batch")); builder.add( DisplayData.item("maxNumRows", spec.getMaxNumRows()) - .withLabel("Max number of rows in Batches")); + .withLabel("Max number of rows in each batch")); builder.add( DisplayData.item("groupingFactor", spec.getGroupingFactor()) .withLabel("Number of batches to sort over")); From 30a68f57ab7803560d2e079479de161165ba08c8 Mon Sep 17 00:00:00 2001 From: nielm Date: Tue, 19 May 2020 00:25:03 +0200 Subject: [PATCH 4/4] Refactor to extract single method for popuplating displayData --- .../beam/sdk/io/gcp/spanner/SpannerIO.java | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java index 3d7126d703b3..69f438055da9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java @@ -953,6 +953,10 @@ public SpannerWriteResult expand(PCollection input) { @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); + populateDisplayDataWithParamaters(builder); + } + + private void populateDisplayDataWithParamaters(DisplayData.Builder builder) { getSpannerConfig().populateDisplayData(builder); builder.add( DisplayData.item("batchSizeBytes", getBatchSizeBytes()) @@ -1005,19 +1009,7 @@ public WriteGrouped(Write spec) { @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); - spec.getSpannerConfig().populateDisplayData(builder); - builder.add( - DisplayData.item("batchSizeBytes", spec.getBatchSizeBytes()) - .withLabel("Max batch size in sytes")); - builder.add( - DisplayData.item("maxNumMutations", spec.getMaxNumMutations()) - .withLabel("Max number of mutated cells in each batch")); - builder.add( - DisplayData.item("maxNumRows", spec.getMaxNumRows()) - .withLabel("Max number of rows in each batch")); - builder.add( - DisplayData.item("groupingFactor", spec.getGroupingFactor()) - .withLabel("Number of batches to sort over")); + spec.populateDisplayDataWithParamaters(builder); } @Override