From 9eca6e144eacc8b8284ab50a0e1082232c020702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Wed, 21 Feb 2018 17:03:37 +0100 Subject: [PATCH 1/7] [BEAM-3734] Add XmlIOIT using sink and readFiles() The test can be parametrized with charset, number of records and filename prefix. --- .../beam/sdk/io/common/DeleteFileFn.java | 47 ++++++ .../sdk/io/common/IOTestPipelineOptions.java | 7 + .../org/apache/beam/sdk/io/avro/AvroIOIT.java | 3 +- .../sdk/io/common/FileBasedIOITHelper.java | 26 ---- .../org/apache/beam/sdk/io/text/TextIOIT.java | 3 +- .../beam/sdk/io/tfrecord/TFRecordIOIT.java | 3 +- sdks/java/io/xml/build.gradle | 11 ++ sdks/java/io/xml/pom.xml | 14 +- .../java/org/apache/beam/sdk/io/xml/Bird.java | 85 ++++++++++ .../org/apache/beam/sdk/io/xml/XmlIOIT.java | 147 ++++++++++++++++++ .../org/apache/beam/sdk/io/xml/XmlIOTest.java | 63 -------- 11 files changed, 316 insertions(+), 93 deletions(-) create mode 100644 sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java create mode 100644 sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java create mode 100644 sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java new file mode 100644 index 000000000000..65aeb4d950ce --- /dev/null +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.common; + +import com.google.common.collect.Iterables; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.fs.MatchResult; +import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.transforms.DoFn; + +/** + * Deletes matching files using the FileSystems API. + */ +public class DeleteFileFn extends DoFn { + + @ProcessElement + public void processElement(ProcessContext c) throws IOException { + MatchResult match = Iterables + .getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); + + Set resourceIds = new HashSet<>(); + for (MatchResult.Metadata metadataElem : match.metadata()) { + resourceIds.add(metadataElem.resourceId()); + } + + FileSystems.delete(resourceIds); + } +} diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java index b86020ec2784..070733771bf9 100644 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java @@ -107,4 +107,11 @@ public interface IOTestPipelineOptions extends TestPipelineOptions { String getCompressionType(); void setCompressionType(String compressionType); + + /* Xml */ + @Description("Xml file charset name") + @Default.String("UTF-8") + String getCharset(); + + void setCharset(String charset); } diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java index f93d4dc4cba3..f28cc14501fc 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java @@ -27,6 +27,7 @@ import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.io.AvroIO; import org.apache.beam.sdk.io.GenerateSequence; +import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; @@ -119,7 +120,7 @@ public void writeThenReadAll() { testFilenames.apply( "Delete test files", - ParDo.of(new FileBasedIOITHelper.DeleteFileFn()) + ParDo.of(new DeleteFileFn()) .withSideInputs(consolidatedHashcode.apply(View.asSingleton()))); pipeline.run().waitUntilFinish(); diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java index 40b04617d8ad..1771784f73ef 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java @@ -19,16 +19,8 @@ package org.apache.beam.sdk.io.common; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; -import java.io.IOException; -import java.util.Collections; import java.util.Date; -import java.util.HashSet; import java.util.Map; -import java.util.Set; -import org.apache.beam.sdk.io.FileSystems; -import org.apache.beam.sdk.io.fs.MatchResult; -import org.apache.beam.sdk.io.fs.ResourceId; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.PipelineOptionsValidator; import org.apache.beam.sdk.testing.TestPipeline; @@ -82,22 +74,4 @@ public void processElement(ProcessContext c) { } } - /** - * Deletes matching files using the FileSystems API. - */ - public static class DeleteFileFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c) throws IOException { - MatchResult match = Iterables - .getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); - - Set resourceIds = new HashSet<>(); - for (MatchResult.Metadata metadataElem : match.metadata()) { - resourceIds.add(metadataElem.resourceId()); - } - - FileSystems.delete(resourceIds); - } - } } diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java index 3e67f6ab5803..8762b9f800df 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java @@ -26,6 +26,7 @@ import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; @@ -107,7 +108,7 @@ public void writeThenReadAll() { testFilenames.apply( "Delete test files", - ParDo.of(new FileBasedIOITHelper.DeleteFileFn()) + ParDo.of(new DeleteFileFn()) .withSideInputs(consolidatedHashcode.apply(View.asSingleton()))); pipeline.run().waitUntilFinish(); diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java index 99d23e0f432d..cfc5cd4641f0 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java @@ -26,6 +26,7 @@ import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.TFRecordIO; +import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; @@ -121,7 +122,7 @@ public void writeThenReadAll() { .apply(Create.of(filenamePattern)) .apply( "Delete test files", - ParDo.of(new FileBasedIOITHelper.DeleteFileFn()) + ParDo.of(new DeleteFileFn()) .withSideInputs(consolidatedHashcode.apply(View.asSingleton()))); readPipeline.run().waitUntilFinish(); } diff --git a/sdks/java/io/xml/build.gradle b/sdks/java/io/xml/build.gradle index 5e66ad96f35a..61352424141a 100644 --- a/sdks/java/io/xml/build.gradle +++ b/sdks/java/io/xml/build.gradle @@ -21,6 +21,15 @@ applyJavaNature() description = "Apache Beam :: SDKs :: Java :: IO :: XML" +/* + * We need to rely on manually specifying these evaluationDependsOn to ensure that + * the following projects are evaluated before we evaluate this project. This is because + * we are attempting to reference the "sourceSets.test.output" directly. + * TODO: Swap to generating test artifacts which we can then rely on instead of + * the test outputs directly. + */ +evaluationDependsOn(":sdks:java:io:common") + dependencies { compile library.java.guava shadow project(path: ":sdks:java:core", configuration: "shadow") @@ -29,6 +38,8 @@ dependencies { shadow library.java.woodstox_core_asl testCompile project(path: ":sdks:java:core", configuration: "shadowTest") testCompile project(path: ":runners:direct-java", configuration: "shadow") + testCompile project(path: ":sdks:java:io:common", configuration: "shadow") + testCompile project(":sdks:java:io:common").sourceSets.test.output testCompile library.java.junit testCompile library.java.slf4j_jdk14 testCompile library.java.hamcrest_core diff --git a/sdks/java/io/xml/pom.xml b/sdks/java/io/xml/pom.xml index f4783442d75e..db5a858c52a6 100644 --- a/sdks/java/io/xml/pom.xml +++ b/sdks/java/io/xml/pom.xml @@ -109,7 +109,19 @@ hamcrest-library test - + + + org.apache.beam + beam-sdks-java-io-common + test + tests + + + + org.apache.beam + beam-sdks-java-io-common + test + diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java new file mode 100644 index 000000000000..dd52ded0855d --- /dev/null +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.xml; + +import java.io.Serializable; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.XmlType; + +/** + * Test JAXB annotated class. + */ +@SuppressWarnings("unused") @XmlRootElement(name = "bird") @XmlType(propOrder = { "name", + "adjective" }) public final class Bird implements Serializable { + private String name; + private String adjective; + + @XmlElement(name = "species") + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getAdjective() { + return adjective; + } + + public void setAdjective(String adjective) { + this.adjective = adjective; + } + + public Bird() {} + + public Bird(String adjective, String name) { + this.adjective = adjective; + this.name = name; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Bird bird = (Bird) o; + + if (!name.equals(bird.name)) { + return false; + } + return adjective.equals(bird.adjective); + } + + @Override + public int hashCode() { + int result = name.hashCode(); + result = 31 * result + adjective.hashCode(); + return result; + } + + @Override + public String toString() { + return String.format("Bird: %s, %s", name, adjective); + } +} diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java new file mode 100644 index 000000000000..e93578a5ffe6 --- /dev/null +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.xml; + +import com.google.common.collect.ImmutableMap; +import java.nio.charset.Charset; +import java.util.Date; +import java.util.Map; +import org.apache.beam.sdk.io.FileIO; +import org.apache.beam.sdk.io.GenerateSequence; +import org.apache.beam.sdk.io.common.DeleteFileFn; +import org.apache.beam.sdk.io.common.HashingFn; +import org.apache.beam.sdk.io.common.IOTestPipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Combine; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.transforms.Values; +import org.apache.beam.sdk.transforms.View; +import org.apache.beam.sdk.values.PCollection; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Integration tests for {@link org.apache.beam.sdk.io.xml.XmlIO}. + * + *

Run those tests using the command below. Pass in connection information via PipelineOptions: + *

+ *  mvn -e -Pio-it verify -pl sdks/java/io/xml
+ *  -Dit.test=org.apache.beam.sdk.io.xml.XmlIOIT
+ *  -DintegrationTestPipelineOptions='[
+ *  "--numberOfRecords=100000",
+ *  "--filenamePrefix=output_file_path",
+ *  "--charset=UTF-8",
+ *  ]'
+ * 
+ *

+ */ +@RunWith(JUnit4.class) +public class XmlIOIT { + + private static Integer numberOfRecords; + + private static String filenamePrefix; + + private static Charset charset; + + @Rule + public TestPipeline pipeline = TestPipeline.create(); + + @BeforeClass + public static void setup() { + PipelineOptionsFactory.register(IOTestPipelineOptions.class); + IOTestPipelineOptions options = TestPipeline + .testingPipelineOptions() + .as(IOTestPipelineOptions.class); + + filenamePrefix = appendTimestampToPrefix(options.getFilenamePrefix()); + numberOfRecords = options.getNumberOfRecords(); + charset = Charset.forName(options.getCharset()); + } + + private static String appendTimestampToPrefix(String filenamePrefix) { + return String.format("%s_%s", filenamePrefix, new Date().getTime()); + } + + @Test + public void writeThenReadViaSinkAndReadFiles() { + PCollection testFileNames = pipeline + .apply("Generate sequence", GenerateSequence.from(0).to(numberOfRecords)) + .apply("Create Birds", MapElements.via(new LongToBird())) + .apply("Write birds to xml files", + FileIO.write() + .via(XmlIO.sink(Bird.class).withRootElement("birds").withCharset(charset)) + .to(filenamePrefix).withPrefix("birds").withSuffix(".xml")) + .getPerDestinationOutputFilenames().apply("Get filenames", Values.create()); + + PCollection birds = testFileNames + .apply("Find files", FileIO.matchAll()) + .apply("Read matched files", FileIO.readMatches()) + .apply("Read xml files", XmlIO.readFiles() + .withRecordClass(Bird.class).withRootElement("birds") + .withRecordElement("bird") + .withCharset(charset)); + + PCollection consolidatedHashcode = birds + .apply("Map birds to strings", MapElements.via(new BirdToString())) + .apply("Calculate hashcode", Combine.globally(new HashingFn())); + + String expectedHash = getExpectedHashForRecordCount(numberOfRecords); + PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash); + + testFileNames.apply("Delete test files", ParDo.of(new DeleteFileFn()) + .withSideInputs(consolidatedHashcode.apply(View.asSingleton()))); + + pipeline.run().waitUntilFinish(); + } + + private static String getExpectedHashForRecordCount(int numberOfRecords) { + Map expectedHashes = ImmutableMap.of( + 100_000, "af7775de90d0b0c8bbc36273fbca26fe" + ); + + String hash = expectedHashes.get(numberOfRecords); + if (hash == null) { + throw new UnsupportedOperationException( + String.format("No hash for that line count: %s", numberOfRecords) + ); + } + return hash; + } + + private static class LongToBird extends SimpleFunction { + @Override + public Bird apply(Long input) { + return new Bird("Testing", "Bird number " + input); + } + } + + private static class BirdToString extends SimpleFunction{ + @Override + public String apply(Bird input) { + return input.toString(); + } + } +} diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java index f6870e12a614..b6a094fb42a7 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java @@ -23,13 +23,9 @@ import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; -import java.io.Serializable; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.List; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlRootElement; -import javax.xml.bind.annotation.XmlType; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.PAssert; @@ -208,63 +204,4 @@ public void testWriteDisplayData() { assertThat(displayData, hasDisplayItem("rootElement", "bird")); assertThat(displayData, hasDisplayItem("recordClass", Integer.class)); } - - /** - * Test JAXB annotated class. - */ - @SuppressWarnings("unused") - @XmlRootElement(name = "bird") - @XmlType(propOrder = {"name", "adjective"}) - private static final class Bird implements Serializable { - private String name; - private String adjective; - - @XmlElement(name = "species") - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getAdjective() { - return adjective; - } - - public void setAdjective(String adjective) { - this.adjective = adjective; - } - - public Bird() {} - - public Bird(String adjective, String name) { - this.adjective = adjective; - this.name = name; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - Bird bird = (Bird) o; - - if (!name.equals(bird.name)) { - return false; - } - return adjective.equals(bird.adjective); - } - - @Override - public int hashCode() { - int result = name.hashCode(); - result = 31 * result + adjective.hashCode(); - return result; - } - } } From f2b87a0ef2040c1a0d469e7d047c55ba9459e5fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Thu, 22 Feb 2018 15:09:57 +0100 Subject: [PATCH 2/7] [BEAM-3734] Refactor. Reduce code duplication - generify getHashForRecordCount() (it's reusable in all IOITs now - move and rename appendTimestampToPrefix method --- .../beam/sdk/io/common/IOTestHelper.java | 44 +++++++++++++++++++ .../apache/beam/sdk/io/common/TestRow.java | 9 ++-- .../org/apache/beam/sdk/io/avro/AvroIOIT.java | 4 +- .../sdk/io/common/FileBasedIOITHelper.java | 15 ++----- .../org/apache/beam/sdk/io/text/TextIOIT.java | 4 +- .../beam/sdk/io/tfrecord/TFRecordIOIT.java | 4 +- .../org/apache/beam/sdk/io/xml/XmlIOIT.java | 30 ++++--------- 7 files changed, 66 insertions(+), 44 deletions(-) create mode 100644 sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java new file mode 100644 index 000000000000..81a2984def8e --- /dev/null +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.common; + +import java.util.Date; +import java.util.Map; + +/** + * This class contains common helper methods to ease writing IO Tests. + */ +public class IOTestHelper { + + private IOTestHelper() { + } + + public static String appendTimestampSuffix(String text) { + return String.format("%s_%s", text, new Date().getTime()); + } + + public static String getHashForRecordCount(int recordCount, Map hashes) { + String hash = hashes.get(recordCount); + if (hash == null) { + throw new UnsupportedOperationException( + String.format("No hash for that record count: %s", recordCount) + ); + } + return hash; + } +} diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java index e6bc7e8c4f7b..3ad57f446f84 100644 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java @@ -17,6 +17,8 @@ */ package org.apache.beam.sdk.io.common; +import static org.apache.beam.sdk.io.common.IOTestHelper.getHashForRecordCount; + import com.google.auto.value.AutoValue; import com.google.common.collect.ImmutableMap; import java.io.Serializable; @@ -108,10 +110,7 @@ public void processElement(ProcessContext c) { */ public static String getExpectedHashForRowCount(int rowCount) throws UnsupportedOperationException { - String hash = EXPECTED_HASHES.get(rowCount); - if (hash == null) { - throw new UnsupportedOperationException("No hash for that row count"); - } - return hash; + + return getHashForRecordCount(rowCount, EXPECTED_HASHES); } } diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java index f28cc14501fc..855c1c82d62e 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java @@ -17,9 +17,9 @@ */ package org.apache.beam.sdk.io.avro; -import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampToPrefix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; +import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -86,7 +86,7 @@ public static void setup() { IOTestPipelineOptions options = readTestPipelineOptions(); numberOfTextLines = options.getNumberOfRecords(); - filenamePrefix = appendTimestampToPrefix(options.getFilenamePrefix()); + filenamePrefix = appendTimestampSuffix(options.getFilenamePrefix()); } @Test diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java index 1771784f73ef..ffa2fe4b11f2 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java @@ -18,8 +18,9 @@ package org.apache.beam.sdk.io.common; +import static org.apache.beam.sdk.io.common.IOTestHelper.getHashForRecordCount; + import com.google.common.collect.ImmutableMap; -import java.util.Date; import java.util.Map; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.PipelineOptionsValidator; @@ -43,10 +44,6 @@ public static IOTestPipelineOptions readTestPipelineOptions() { return PipelineOptionsValidator.validate(IOTestPipelineOptions.class, options); } - public static String appendTimestampToPrefix(String filenamePrefix) { - return String.format("%s_%s", filenamePrefix, new Date().getTime()); - } - public static String getExpectedHashForLineCount(int lineCount) { Map expectedHashes = ImmutableMap.of( 100_000, "4c8bb3b99dcc59459b20fefba400d446", @@ -54,13 +51,7 @@ public static String getExpectedHashForLineCount(int lineCount) { 100_000_000, "6ce05f456e2fdc846ded2abd0ec1de95" ); - String hash = expectedHashes.get(lineCount); - if (hash == null) { - throw new UnsupportedOperationException( - String.format("No hash for that line count: %s", lineCount) - ); - } - return hash; + return getHashForRecordCount(lineCount, expectedHashes); } /** diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java index 8762b9f800df..e0b68fda730d 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java @@ -19,9 +19,9 @@ package org.apache.beam.sdk.io.text; import static org.apache.beam.sdk.io.Compression.AUTO; -import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampToPrefix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; +import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; @@ -76,7 +76,7 @@ public static void setup() { IOTestPipelineOptions options = readTestPipelineOptions(); numberOfTextLines = options.getNumberOfRecords(); - filenamePrefix = appendTimestampToPrefix(options.getFilenamePrefix()); + filenamePrefix = appendTimestampSuffix(options.getFilenamePrefix()); compressionType = Compression.valueOf(options.getCompressionType()); } diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java index cfc5cd4641f0..2eb70b4e8a57 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java @@ -19,9 +19,9 @@ package org.apache.beam.sdk.io.tfrecord; import static org.apache.beam.sdk.io.Compression.AUTO; -import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampToPrefix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; +import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; @@ -81,7 +81,7 @@ public static void setup() { IOTestPipelineOptions options = readTestPipelineOptions(); numberOfTextLines = options.getNumberOfRecords(); - filenamePrefix = appendTimestampToPrefix(options.getFilenamePrefix()); + filenamePrefix = appendTimestampSuffix(options.getFilenamePrefix()); compressionType = Compression.valueOf(options.getCompressionType()); } diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java index e93578a5ffe6..0a87c84bb8ca 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -17,14 +17,16 @@ */ package org.apache.beam.sdk.io.xml; +import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; + import com.google.common.collect.ImmutableMap; import java.nio.charset.Charset; -import java.util.Date; import java.util.Map; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.HashingFn; +import org.apache.beam.sdk.io.common.IOTestHelper; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.PAssert; @@ -60,6 +62,10 @@ @RunWith(JUnit4.class) public class XmlIOIT { + private static final Map EXPECTED_HASHES = ImmutableMap.of( + 100_000, "af7775de90d0b0c8bbc36273fbca26fe" + ); + private static Integer numberOfRecords; private static String filenamePrefix; @@ -76,15 +82,11 @@ public static void setup() { .testingPipelineOptions() .as(IOTestPipelineOptions.class); - filenamePrefix = appendTimestampToPrefix(options.getFilenamePrefix()); + filenamePrefix = appendTimestampSuffix(options.getFilenamePrefix()); numberOfRecords = options.getNumberOfRecords(); charset = Charset.forName(options.getCharset()); } - private static String appendTimestampToPrefix(String filenamePrefix) { - return String.format("%s_%s", filenamePrefix, new Date().getTime()); - } - @Test public void writeThenReadViaSinkAndReadFiles() { PCollection testFileNames = pipeline @@ -108,7 +110,7 @@ public void writeThenReadViaSinkAndReadFiles() { .apply("Map birds to strings", MapElements.via(new BirdToString())) .apply("Calculate hashcode", Combine.globally(new HashingFn())); - String expectedHash = getExpectedHashForRecordCount(numberOfRecords); + String expectedHash = IOTestHelper.getHashForRecordCount(numberOfRecords, EXPECTED_HASHES); PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash); testFileNames.apply("Delete test files", ParDo.of(new DeleteFileFn()) @@ -117,20 +119,6 @@ public void writeThenReadViaSinkAndReadFiles() { pipeline.run().waitUntilFinish(); } - private static String getExpectedHashForRecordCount(int numberOfRecords) { - Map expectedHashes = ImmutableMap.of( - 100_000, "af7775de90d0b0c8bbc36273fbca26fe" - ); - - String hash = expectedHashes.get(numberOfRecords); - if (hash == null) { - throw new UnsupportedOperationException( - String.format("No hash for that line count: %s", numberOfRecords) - ); - } - return hash; - } - private static class LongToBird extends SimpleFunction { @Override public Bird apply(Long input) { From e538394283a2b4a1ad4a6e27b9ae3f109eac76f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Thu, 22 Feb 2018 19:55:17 +0100 Subject: [PATCH 3/7] [BEAM-3734] Add Perfkit and Dataflow support --- sdks/java/io/xml/pom.xml | 112 ++++++++++++++++++ .../org/apache/beam/sdk/io/xml/XmlIOIT.java | 6 +- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/sdks/java/io/xml/pom.xml b/sdks/java/io/xml/pom.xml index db5a858c52a6..98018f9d756f 100644 --- a/sdks/java/io/xml/pom.xml +++ b/sdks/java/io/xml/pom.xml @@ -125,6 +125,118 @@ + + + dataflow-runner + + + integrationTestRunner + dataflow + + + + + org.apache.beam + beam-runners-google-cloud-dataflow-java + runtime + + + + + + + io-it-suite + + io-it-suite + + + + ${project.parent.parent.parent.parent.basedir} + + + + + org.codehaus.gmaven + groovy-maven-plugin + ${groovy-maven-plugin.version} + + + find-supported-python-for-compile + initialize + + execute + + + ${beamRootProjectDir}/sdks/python/findSupportedPython.groovy + + + + + + + org.codehaus.mojo + exec-maven-plugin + ${maven-exec-plugin.version} + + + verify + + exec + + + + + ${python.interpreter.bin} + + ${pkbLocation} + -benchmarks=beam_integration_benchmark + -beam_it_profile=io-it + -beam_it_timeout=${pkbTimeout} + -beam_location=${beamRootProjectDir} + -beam_prebuilt=true + -beam_sdk=java + + ${pkbBeamRunnerProfile} + ${pkbBeamRunnerOption} + + -beam_it_module=sdks/java/io/xml + -beam_it_class=${ioTest} + + -beam_it_options=${integrationTestPipelineOptions} + + -beam_extra_mvn_properties=${pkbExtraProperties} + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + + + java-9 diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java index 0a87c84bb8ca..ed1f4ac9fe52 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -34,6 +34,7 @@ import org.apache.beam.sdk.transforms.Combine; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.Reshuffle; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.transforms.Values; import org.apache.beam.sdk.transforms.View; @@ -96,7 +97,10 @@ public void writeThenReadViaSinkAndReadFiles() { FileIO.write() .via(XmlIO.sink(Bird.class).withRootElement("birds").withCharset(charset)) .to(filenamePrefix).withPrefix("birds").withSuffix(".xml")) - .getPerDestinationOutputFilenames().apply("Get filenames", Values.create()); + .getPerDestinationOutputFilenames() + .apply("Prevent fusion", Reshuffle.viaRandomKey()) + .apply("Get filenames", Values.create()); + PCollection birds = testFileNames .apply("Find files", FileIO.matchAll()) From a7a744b758b27a3ffac4cb69cbea5d18accd26d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Fri, 23 Feb 2018 15:11:10 +0100 Subject: [PATCH 4/7] [BEAM-3734] Add hashes for various record quantity --- .../xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java index ed1f4ac9fe52..66588851144a 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -64,7 +64,9 @@ public class XmlIOIT { private static final Map EXPECTED_HASHES = ImmutableMap.of( - 100_000, "af7775de90d0b0c8bbc36273fbca26fe" + 1000, "7f51adaf701441ee83459a3f705c1b86", + 100_000, "af7775de90d0b0c8bbc36273fbca26fe", + 100_000_000, "bfee52b33aa1552b9c1bfa8bcc41ae80" ); private static Integer numberOfRecords; From 8642edf5a4d3c247164e35690436fc55e1fac2d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Fri, 23 Feb 2018 18:06:42 +0100 Subject: [PATCH 5/7] [BEAM-3734] Add another XmlIOIT using write and read --- .../org/apache/beam/sdk/io/xml/XmlIOIT.java | 54 ++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java index 66588851144a..19033a6c58e2 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java +++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -32,6 +32,7 @@ import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Combine; +import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Reshuffle; @@ -78,6 +79,9 @@ public class XmlIOIT { @Rule public TestPipeline pipeline = TestPipeline.create(); + @Rule + public TestPipeline readPipeline = TestPipeline.create(); + @BeforeClass public static void setup() { PipelineOptionsFactory.register(IOTestPipelineOptions.class); @@ -95,14 +99,16 @@ public void writeThenReadViaSinkAndReadFiles() { PCollection testFileNames = pipeline .apply("Generate sequence", GenerateSequence.from(0).to(numberOfRecords)) .apply("Create Birds", MapElements.via(new LongToBird())) - .apply("Write birds to xml files", - FileIO.write() - .via(XmlIO.sink(Bird.class).withRootElement("birds").withCharset(charset)) - .to(filenamePrefix).withPrefix("birds").withSuffix(".xml")) + .apply("Write birds to xml files", FileIO.write() + .via(XmlIO.sink(Bird.class) + .withRootElement("birds") + .withCharset(charset)) + .to(filenamePrefix) + .withPrefix("birds") + .withSuffix(".xml")) .getPerDestinationOutputFilenames() .apply("Prevent fusion", Reshuffle.viaRandomKey()) - .apply("Get filenames", Values.create()); - + .apply("Get file names", Values.create()); PCollection birds = testFileNames .apply("Find files", FileIO.matchAll()) @@ -125,6 +131,42 @@ public void writeThenReadViaSinkAndReadFiles() { pipeline.run().waitUntilFinish(); } + @Test + public void writeThenReadViaWriteAndRead() { + String filenamePattern = filenamePrefix + "*"; + + pipeline + .apply("Generate sequence", GenerateSequence.from(0).to(numberOfRecords)) + .apply("Create Birds", MapElements.via(new LongToBird())) + .apply("Write birds to xml files", XmlIO.write() + .to(filenamePrefix) + .withRecordClass(Bird.class) + .withRootElement("birds") + .withCharset(charset)); + + pipeline.run().waitUntilFinish(); + + PCollection consolidatedHashcode = readPipeline + .apply(XmlIO.read() + .from(filenamePattern) + .withRecordClass(Bird.class) + .withRootElement("birds") + .withRecordElement("bird") + .withCharset(charset)) + .apply("Map birds to strings", MapElements.via(new BirdToString())) + .apply("Calculate hashcode", Combine.globally(new HashingFn())); + + String expectedHash = IOTestHelper.getHashForRecordCount(numberOfRecords, EXPECTED_HASHES); + PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash); + + readPipeline + .apply("Get file names to delete", Create.of(filenamePattern)) + .apply("Delete test files", ParDo.of(new DeleteFileFn()) + .withSideInputs(consolidatedHashcode.apply(View.asSingleton()))); + + readPipeline.run().waitUntilFinish(); + } + private static class LongToBird extends SimpleFunction { @Override public Bird apply(Long input) { From e3960b9fea070f4d514b71629fad156fc4f8773c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Fri, 23 Feb 2018 18:49:07 +0100 Subject: [PATCH 6/7] [BEAM-3734] Add Jenkins job definitions for Large scale tests --- .../job_beam_PerformanceTests_XmlIO_IT.groovy | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 .test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy new file mode 100644 index 000000000000..95bc8dc1a8e7 --- /dev/null +++ b/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import common_job_properties + +def testsConfigurations = [ + [ + jobName : 'beam_PerformanceTests_XmlIOIT_Sink_And_ReadFiles', + jobDescription : 'Runs PerfKit test for XmlIOIT (sink and read API)', + itModule : 'sdks/java/io/xml', + testName : 'org.apache.beam.sdk.io.xml.XmlIOIT#writeThenReadViaSinkAndReadFiles', + bqTable : 'beam_performance.xmlioit_sink_and_readFiles_pkb_results.large', + prCommitStatusName: 'Java XmlIO Sink and ReadFiles Performance Test', + prTriggerPhase : 'Run XmlIO Sink and ReadFiles Performance Test', + extraPipelineArgs: [ + numberOfRecords: '100000000', + charset: 'UTF-8' + ] + ], + [ + jobName : 'beam_PerformanceTests_XmlIOIT_Write_And_Read', + jobDescription : 'Runs PerfKit test for XmlIOIT (write and read API)', + itModule : 'sdks/java/io/xml', + testName : 'org.apache.beam.sdk.io.xml.XmlIOIT#writeThenReadViaWriteAndRead', + bqTable : 'beam_performance.xmlioit_write_and_read_pkb_results.large', + prCommitStatusName: 'Java XmlIO Write and Read Performance Test', + prTriggerPhase : 'Run XmlIO Write and Read Performance Test', + extraPipelineArgs: [ + numberOfRecords: '100000000', + charset: 'UTF-8' + ] + ] +] + +for (testConfiguration in testsConfigurations) { + create_performance_test_job(testConfiguration) +} + + +private void create_performance_test_job(testConfiguration) { + + // This job runs the file-based IOs performance tests on PerfKit Benchmarker. + job(testConfiguration.jobName) { + description(testConfiguration.jobDescription) + + // Set default Beam job properties. + common_job_properties.setTopLevelMainJobProperties(delegate) + + // Allows triggering this build against pull requests. + common_job_properties.enablePhraseTriggeringFromPullRequest( + delegate, + testConfiguration.prCommitStatusName, + testConfiguration.prTriggerPhase) + + // Run job in postcommit every 6 hours, don't trigger every push, and + // don't email individual committers. + common_job_properties.setPostCommit( + delegate, + '0 */6 * * *', + false, + 'commits@beam.apache.org', + false) + + def pipelineArgs = [ + project : 'apache-beam-testing', + tempRoot : 'gs://temp-storage-for-perf-tests', + filenamePrefix : "gs://temp-storage-for-perf-tests/${testConfiguration.jobName}/\${BUILD_ID}/", + ] + if (testConfiguration.containsKey('extraPipelineArgs')) { + pipelineArgs << testConfiguration.extraPipelineArgs + } + + def pipelineArgList = [] + pipelineArgs.each({ + key, value -> pipelineArgList.add("\"--$key=$value\"") + }) + def pipelineArgsJoined = "[" + pipelineArgList.join(',') + "]" + + def argMap = [ + benchmarks : 'beam_integration_benchmark', + beam_it_timeout : '1800', + beam_it_profile : 'io-it', + beam_prebuilt : 'true', + beam_sdk : 'java', + beam_it_module : testConfiguration.itModule, + beam_it_class : testConfiguration.testName, + beam_it_options : pipelineArgsJoined, + beam_extra_mvn_properties: '["filesystem=gcs"]', + bigquery_table : testConfiguration.bqTable, + ] + common_job_properties.buildPerformanceTest(delegate, argMap) + } +} \ No newline at end of file From c8b66c3f0f28e2dc2cd686e07b425469f1eb382b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Gajowy?= Date: Wed, 7 Mar 2018 13:23:05 +0100 Subject: [PATCH 7/7] [BEAM-3734] Post code review fixes --- ...eam_PerformanceTests_FileBasedIO_IT.groovy | 23 +++- .../job_beam_PerformanceTests_XmlIO_IT.groovy | 108 --------------- .../beam/sdk/io/common/DeleteFileFn.java | 47 ------- .../beam/sdk/io/common/IOTestHelper.java | 44 ------ .../sdk/io/common/IOTestPipelineOptions.java | 2 +- .../apache/beam/sdk/io/common/TestRow.java | 9 +- sdks/java/io/file-based-io-tests/build.gradle | 1 + sdks/java/io/file-based-io-tests/pom.xml | 6 +- .../org/apache/beam/sdk/io/avro/AvroIOIT.java | 4 +- .../sdk/io/common/FileBasedIOITHelper.java | 44 +++++- .../org/apache/beam/sdk/io/text/TextIOIT.java | 4 +- .../beam/sdk/io/tfrecord/TFRecordIOIT.java | 4 +- .../org/apache/beam/sdk/io/xml/XmlIOIT.java | 125 +++++++++++------- sdks/java/io/xml/build.gradle | 11 -- sdks/java/io/xml/pom.xml | 125 ------------------ .../java/org/apache/beam/sdk/io/xml/Bird.java | 85 ------------ .../org/apache/beam/sdk/io/xml/XmlIOTest.java | 68 ++++++++++ 17 files changed, 224 insertions(+), 486 deletions(-) delete mode 100644 .test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy delete mode 100644 sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java delete mode 100644 sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java rename sdks/java/io/{xml => file-based-io-tests}/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java (67%) delete mode 100644 sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy index 667b11d20725..117ff40d8952 100644 --- a/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy +++ b/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy @@ -26,6 +26,9 @@ def testsConfigurations = [ bqTable : 'beam_performance.textioit_pkb_results', prCommitStatusName: 'Java TextIO Performance Test', prTriggerPhase : 'Run Java TextIO Performance Test', + extraPipelineArgs: [ + numberOfRecords: '1000000' + ] ], [ @@ -36,6 +39,7 @@ def testsConfigurations = [ prCommitStatusName : 'Java CompressedTextIO Performance Test', prTriggerPhase : 'Run Java CompressedTextIO Performance Test', extraPipelineArgs: [ + numberOfRecords: '1000000', compressionType: 'GZIP' ] ], @@ -46,6 +50,9 @@ def testsConfigurations = [ bqTable : 'beam_performance.avroioit_pkb_results', prCommitStatusName: 'Java AvroIO Performance Test', prTriggerPhase : 'Run Java AvroIO Performance Test', + extraPipelineArgs: [ + numberOfRecords: '1000000' + ] ], [ jobName : 'beam_PerformanceTests_TFRecordIOIT', @@ -54,7 +61,22 @@ def testsConfigurations = [ bqTable : 'beam_performance.tfrecordioit_pkb_results', prCommitStatusName: 'Java TFRecordIO Performance Test', prTriggerPhase : 'Run Java TFRecordIO Performance Test', + extraPipelineArgs: [ + numberOfRecords: '1000000' + ] ], + [ + jobName : 'beam_PerformanceTests_XmlIOIT', + jobDescription : 'Runs PerfKit tests for beam_PerformanceTests_XmlIOIT', + itClass : 'org.apache.beam.sdk.io.xml.XmlIOIT', + bqTable : 'beam_performance.xmlioit_pkb_results', + prCommitStatusName: 'Java XmlIOPerformance Test', + prTriggerPhase : 'Run Java XmlIO Performance Test', + extraPipelineArgs: [ + numberOfRecords: '100000000', + charset: 'UTF-8' + ] + ] ] for (testConfiguration in testsConfigurations) { @@ -89,7 +111,6 @@ private void create_filebasedio_performance_test_job(testConfiguration) { def pipelineArgs = [ project : 'apache-beam-testing', tempRoot : 'gs://temp-storage-for-perf-tests', - numberOfRecords: '1000000', filenamePrefix : "gs://temp-storage-for-perf-tests/${testConfiguration.jobName}/\${BUILD_ID}/", ] if (testConfiguration.containsKey('extraPipelineArgs')) { diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy deleted file mode 100644 index 95bc8dc1a8e7..000000000000 --- a/.test-infra/jenkins/job_beam_PerformanceTests_XmlIO_IT.groovy +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import common_job_properties - -def testsConfigurations = [ - [ - jobName : 'beam_PerformanceTests_XmlIOIT_Sink_And_ReadFiles', - jobDescription : 'Runs PerfKit test for XmlIOIT (sink and read API)', - itModule : 'sdks/java/io/xml', - testName : 'org.apache.beam.sdk.io.xml.XmlIOIT#writeThenReadViaSinkAndReadFiles', - bqTable : 'beam_performance.xmlioit_sink_and_readFiles_pkb_results.large', - prCommitStatusName: 'Java XmlIO Sink and ReadFiles Performance Test', - prTriggerPhase : 'Run XmlIO Sink and ReadFiles Performance Test', - extraPipelineArgs: [ - numberOfRecords: '100000000', - charset: 'UTF-8' - ] - ], - [ - jobName : 'beam_PerformanceTests_XmlIOIT_Write_And_Read', - jobDescription : 'Runs PerfKit test for XmlIOIT (write and read API)', - itModule : 'sdks/java/io/xml', - testName : 'org.apache.beam.sdk.io.xml.XmlIOIT#writeThenReadViaWriteAndRead', - bqTable : 'beam_performance.xmlioit_write_and_read_pkb_results.large', - prCommitStatusName: 'Java XmlIO Write and Read Performance Test', - prTriggerPhase : 'Run XmlIO Write and Read Performance Test', - extraPipelineArgs: [ - numberOfRecords: '100000000', - charset: 'UTF-8' - ] - ] -] - -for (testConfiguration in testsConfigurations) { - create_performance_test_job(testConfiguration) -} - - -private void create_performance_test_job(testConfiguration) { - - // This job runs the file-based IOs performance tests on PerfKit Benchmarker. - job(testConfiguration.jobName) { - description(testConfiguration.jobDescription) - - // Set default Beam job properties. - common_job_properties.setTopLevelMainJobProperties(delegate) - - // Allows triggering this build against pull requests. - common_job_properties.enablePhraseTriggeringFromPullRequest( - delegate, - testConfiguration.prCommitStatusName, - testConfiguration.prTriggerPhase) - - // Run job in postcommit every 6 hours, don't trigger every push, and - // don't email individual committers. - common_job_properties.setPostCommit( - delegate, - '0 */6 * * *', - false, - 'commits@beam.apache.org', - false) - - def pipelineArgs = [ - project : 'apache-beam-testing', - tempRoot : 'gs://temp-storage-for-perf-tests', - filenamePrefix : "gs://temp-storage-for-perf-tests/${testConfiguration.jobName}/\${BUILD_ID}/", - ] - if (testConfiguration.containsKey('extraPipelineArgs')) { - pipelineArgs << testConfiguration.extraPipelineArgs - } - - def pipelineArgList = [] - pipelineArgs.each({ - key, value -> pipelineArgList.add("\"--$key=$value\"") - }) - def pipelineArgsJoined = "[" + pipelineArgList.join(',') + "]" - - def argMap = [ - benchmarks : 'beam_integration_benchmark', - beam_it_timeout : '1800', - beam_it_profile : 'io-it', - beam_prebuilt : 'true', - beam_sdk : 'java', - beam_it_module : testConfiguration.itModule, - beam_it_class : testConfiguration.testName, - beam_it_options : pipelineArgsJoined, - beam_extra_mvn_properties: '["filesystem=gcs"]', - bigquery_table : testConfiguration.bqTable, - ] - common_job_properties.buildPerformanceTest(delegate, argMap) - } -} \ No newline at end of file diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java deleted file mode 100644 index 65aeb4d950ce..000000000000 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/DeleteFileFn.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.common; - -import com.google.common.collect.Iterables; -import java.io.IOException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; -import org.apache.beam.sdk.io.FileSystems; -import org.apache.beam.sdk.io.fs.MatchResult; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; - -/** - * Deletes matching files using the FileSystems API. - */ -public class DeleteFileFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c) throws IOException { - MatchResult match = Iterables - .getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); - - Set resourceIds = new HashSet<>(); - for (MatchResult.Metadata metadataElem : match.metadata()) { - resourceIds.add(metadataElem.resourceId()); - } - - FileSystems.delete(resourceIds); - } -} diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java deleted file mode 100644 index 81a2984def8e..000000000000 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestHelper.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.common; - -import java.util.Date; -import java.util.Map; - -/** - * This class contains common helper methods to ease writing IO Tests. - */ -public class IOTestHelper { - - private IOTestHelper() { - } - - public static String appendTimestampSuffix(String text) { - return String.format("%s_%s", text, new Date().getTime()); - } - - public static String getHashForRecordCount(int recordCount, Map hashes) { - String hash = hashes.get(recordCount); - if (hash == null) { - throw new UnsupportedOperationException( - String.format("No hash for that record count: %s", recordCount) - ); - } - return hash; - } -} diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java index 070733771bf9..89b7ae81bc5a 100644 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/IOTestPipelineOptions.java @@ -108,7 +108,7 @@ public interface IOTestPipelineOptions extends TestPipelineOptions { void setCompressionType(String compressionType); - /* Xml */ + /* Used by XmlIOIT */ @Description("Xml file charset name") @Default.String("UTF-8") String getCharset(); diff --git a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java index 3ad57f446f84..e6bc7e8c4f7b 100644 --- a/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java +++ b/sdks/java/io/common/src/test/java/org/apache/beam/sdk/io/common/TestRow.java @@ -17,8 +17,6 @@ */ package org.apache.beam.sdk.io.common; -import static org.apache.beam.sdk.io.common.IOTestHelper.getHashForRecordCount; - import com.google.auto.value.AutoValue; import com.google.common.collect.ImmutableMap; import java.io.Serializable; @@ -110,7 +108,10 @@ public void processElement(ProcessContext c) { */ public static String getExpectedHashForRowCount(int rowCount) throws UnsupportedOperationException { - - return getHashForRecordCount(rowCount, EXPECTED_HASHES); + String hash = EXPECTED_HASHES.get(rowCount); + if (hash == null) { + throw new UnsupportedOperationException("No hash for that row count"); + } + return hash; } } diff --git a/sdks/java/io/file-based-io-tests/build.gradle b/sdks/java/io/file-based-io-tests/build.gradle index b1a9167e73e3..e797172850a7 100644 --- a/sdks/java/io/file-based-io-tests/build.gradle +++ b/sdks/java/io/file-based-io-tests/build.gradle @@ -35,6 +35,7 @@ dependencies { shadowTest project(":runners:direct-java").sourceSets.test.output shadowTest project(":sdks:java:io:common") shadowTest project(":sdks:java:io:common").sourceSets.test.output + shadowTest project(":sdks:java:io:xml") shadowTest library.java.guava shadowTest library.java.junit shadowTest library.java.hamcrest_core diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml index 740def811bb7..369f2bc9fde9 100644 --- a/sdks/java/io/file-based-io-tests/pom.xml +++ b/sdks/java/io/file-based-io-tests/pom.xml @@ -349,6 +349,10 @@ avro test - + + org.apache.beam + beam-sdks-java-io-xml + test + diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java index 855c1c82d62e..19f4a68b5ff0 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/avro/AvroIOIT.java @@ -17,9 +17,9 @@ */ package org.apache.beam.sdk.io.avro; +import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampSuffix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; -import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -27,8 +27,8 @@ import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.io.AvroIO; import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; +import org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java index ffa2fe4b11f2..bbf707e26dd7 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/common/FileBasedIOITHelper.java @@ -15,13 +15,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.beam.sdk.io.common; -import static org.apache.beam.sdk.io.common.IOTestHelper.getHashForRecordCount; - import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import java.io.IOException; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; import java.util.Map; +import java.util.Set; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.fs.MatchResult; +import org.apache.beam.sdk.io.fs.ResourceId; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.PipelineOptionsValidator; import org.apache.beam.sdk.testing.TestPipeline; @@ -44,6 +50,10 @@ public static IOTestPipelineOptions readTestPipelineOptions() { return PipelineOptionsValidator.validate(IOTestPipelineOptions.class, options); } + public static String appendTimestampSuffix(String text) { + return String.format("%s_%s", text, new Date().getTime()); + } + public static String getExpectedHashForLineCount(int lineCount) { Map expectedHashes = ImmutableMap.of( 100_000, "4c8bb3b99dcc59459b20fefba400d446", @@ -54,6 +64,16 @@ public static String getExpectedHashForLineCount(int lineCount) { return getHashForRecordCount(lineCount, expectedHashes); } + public static String getHashForRecordCount(int recordCount, Map hashes) { + String hash = hashes.get(recordCount); + if (hash == null) { + throw new UnsupportedOperationException( + String.format("No hash for that record count: %s", recordCount) + ); + } + return hash; + } + /** * Constructs text lines in files used for testing. */ @@ -65,4 +85,22 @@ public void processElement(ProcessContext c) { } } + /** + * Deletes matching files using the FileSystems API. + */ + public static class DeleteFileFn extends DoFn { + + @ProcessElement + public void processElement(ProcessContext c) throws IOException { + MatchResult match = Iterables + .getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); + + Set resourceIds = new HashSet<>(); + for (MatchResult.Metadata metadataElem : match.metadata()) { + resourceIds.add(metadataElem.resourceId()); + } + + FileSystems.delete(resourceIds); + } + } } diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java index e0b68fda730d..f6e27cdb3564 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java @@ -19,15 +19,15 @@ package org.apache.beam.sdk.io.text; import static org.apache.beam.sdk.io.Compression.AUTO; +import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampSuffix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; -import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; +import org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java index 2eb70b4e8a57..1ce155912ca1 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/tfrecord/TFRecordIOIT.java @@ -19,15 +19,15 @@ package org.apache.beam.sdk.io.tfrecord; import static org.apache.beam.sdk.io.Compression.AUTO; +import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampSuffix; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getExpectedHashForLineCount; import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.readTestPipelineOptions; -import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; import org.apache.beam.sdk.io.Compression; import org.apache.beam.sdk.io.GenerateSequence; import org.apache.beam.sdk.io.TFRecordIO; -import org.apache.beam.sdk.io.common.DeleteFileFn; import org.apache.beam.sdk.io.common.FileBasedIOITHelper; +import org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn; import org.apache.beam.sdk.io.common.HashingFn; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; import org.apache.beam.sdk.testing.PAssert; diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java similarity index 67% rename from sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java rename to sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java index 19033a6c58e2..7176d7f42da0 100644 --- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/xml/XmlIOIT.java @@ -17,22 +17,25 @@ */ package org.apache.beam.sdk.io.xml; -import static org.apache.beam.sdk.io.common.IOTestHelper.appendTimestampSuffix; +import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.appendTimestampSuffix; +import static org.apache.beam.sdk.io.common.FileBasedIOITHelper.getHashForRecordCount; import com.google.common.collect.ImmutableMap; +import java.io.Serializable; import java.nio.charset.Charset; import java.util.Map; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.XmlType; import org.apache.beam.sdk.io.FileIO; import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.common.DeleteFileFn; +import org.apache.beam.sdk.io.common.FileBasedIOITHelper; import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.IOTestHelper; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Reshuffle; @@ -51,7 +54,7 @@ * *

Run those tests using the command below. Pass in connection information via PipelineOptions: *

- *  mvn -e -Pio-it verify -pl sdks/java/io/xml
+ *  mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests
  *  -Dit.test=org.apache.beam.sdk.io.xml.XmlIOIT
  *  -DintegrationTestPipelineOptions='[
  *  "--numberOfRecords=100000",
@@ -79,9 +82,6 @@ public class XmlIOIT {
   @Rule
   public TestPipeline pipeline = TestPipeline.create();
 
-  @Rule
-  public TestPipeline readPipeline = TestPipeline.create();
-
   @BeforeClass
   public static void setup() {
     PipelineOptionsFactory.register(IOTestPipelineOptions.class);
@@ -95,11 +95,11 @@ public static void setup() {
   }
 
   @Test
-  public void writeThenReadViaSinkAndReadFiles() {
+  public void writeThenReadAll() {
     PCollection testFileNames = pipeline
       .apply("Generate sequence", GenerateSequence.from(0).to(numberOfRecords))
-      .apply("Create Birds", MapElements.via(new LongToBird()))
-      .apply("Write birds to xml files", FileIO.write()
+      .apply("Create xml records", MapElements.via(new LongToBird()))
+      .apply("Write xml files", FileIO.write()
           .via(XmlIO.sink(Bird.class)
             .withRootElement("birds")
             .withCharset(charset))
@@ -119,65 +119,90 @@ public void writeThenReadViaSinkAndReadFiles() {
         .withCharset(charset));
 
     PCollection consolidatedHashcode = birds
-      .apply("Map birds to strings", MapElements.via(new BirdToString()))
+      .apply("Map xml records to strings", MapElements.via(new BirdToString()))
       .apply("Calculate hashcode", Combine.globally(new HashingFn()));
 
-    String expectedHash = IOTestHelper.getHashForRecordCount(numberOfRecords, EXPECTED_HASHES);
+    String expectedHash = getHashForRecordCount(numberOfRecords, EXPECTED_HASHES);
     PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
 
-    testFileNames.apply("Delete test files", ParDo.of(new DeleteFileFn())
+    testFileNames.apply("Delete test files", ParDo.of(new FileBasedIOITHelper.DeleteFileFn())
         .withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
 
     pipeline.run().waitUntilFinish();
   }
 
-  @Test
-  public void writeThenReadViaWriteAndRead() {
-    String filenamePattern = filenamePrefix + "*";
+  private static class LongToBird extends SimpleFunction {
+    @Override
+    public Bird apply(Long input) {
+      return new Bird("Testing", "Bird number " + input);
+    }
+  }
 
-    pipeline
-      .apply("Generate sequence", GenerateSequence.from(0).to(numberOfRecords))
-      .apply("Create Birds", MapElements.via(new LongToBird()))
-      .apply("Write birds to xml files", XmlIO.write()
-          .to(filenamePrefix)
-          .withRecordClass(Bird.class)
-          .withRootElement("birds")
-          .withCharset(charset));
+  private static class BirdToString extends SimpleFunction {
+    @Override
+    public String apply(Bird input) {
+      return input.toString();
+    }
+  }
 
-    pipeline.run().waitUntilFinish();
+  @SuppressWarnings("unused")
+  @XmlRootElement(name = "bird")
+  @XmlType(propOrder = { "name", "adjective" })
+  private static final class Bird implements Serializable {
+    private String name;
+    private String adjective;
 
-    PCollection consolidatedHashcode = readPipeline
-      .apply(XmlIO.read()
-        .from(filenamePattern)
-        .withRecordClass(Bird.class)
-        .withRootElement("birds")
-        .withRecordElement("bird")
-        .withCharset(charset))
-      .apply("Map birds to strings", MapElements.via(new BirdToString()))
-      .apply("Calculate hashcode", Combine.globally(new HashingFn()));
+    @XmlElement(name = "species")
+    public String getName() {
+      return name;
+    }
 
-    String expectedHash = IOTestHelper.getHashForRecordCount(numberOfRecords, EXPECTED_HASHES);
-    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
+    public void setName(String name) {
+      this.name = name;
+    }
 
-    readPipeline
-      .apply("Get file names to delete", Create.of(filenamePattern))
-      .apply("Delete test files", ParDo.of(new DeleteFileFn())
-          .withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
+    public String getAdjective() {
+      return adjective;
+    }
 
-    readPipeline.run().waitUntilFinish();
-  }
+    public void setAdjective(String adjective) {
+      this.adjective = adjective;
+    }
+
+    public Bird() {}
+
+    public Bird(String adjective, String name) {
+      this.adjective = adjective;
+      this.name = name;
+    }
 
-  private static class LongToBird extends SimpleFunction {
     @Override
-    public Bird apply(Long input) {
-      return new Bird("Testing", "Bird number " + input);
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      Bird bird = (Bird) o;
+
+      if (!name.equals(bird.name)) {
+        return false;
+      }
+      return adjective.equals(bird.adjective);
     }
-  }
 
-  private static class BirdToString extends SimpleFunction{
     @Override
-    public String apply(Bird input) {
-      return input.toString();
+    public int hashCode() {
+      int result = name.hashCode();
+      result = 31 * result + adjective.hashCode();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Bird: %s, %s", name, adjective);
     }
   }
 }
diff --git a/sdks/java/io/xml/build.gradle b/sdks/java/io/xml/build.gradle
index 61352424141a..5e66ad96f35a 100644
--- a/sdks/java/io/xml/build.gradle
+++ b/sdks/java/io/xml/build.gradle
@@ -21,15 +21,6 @@ applyJavaNature()
 
 description = "Apache Beam :: SDKs :: Java :: IO :: XML"
 
-/*
- * We need to rely on manually specifying these evaluationDependsOn to ensure that
- * the following projects are evaluated before we evaluate this project. This is because
- * we are attempting to reference the "sourceSets.test.output" directly.
- * TODO: Swap to generating test artifacts which we can then rely on instead of
- * the test outputs directly.
- */
-evaluationDependsOn(":sdks:java:io:common")
-
 dependencies {
   compile library.java.guava
   shadow project(path: ":sdks:java:core", configuration: "shadow")
@@ -38,8 +29,6 @@ dependencies {
   shadow library.java.woodstox_core_asl
   testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
   testCompile project(path: ":runners:direct-java", configuration: "shadow")
-  testCompile project(path: ":sdks:java:io:common", configuration: "shadow")
-  testCompile project(":sdks:java:io:common").sourceSets.test.output
   testCompile library.java.junit
   testCompile library.java.slf4j_jdk14
   testCompile library.java.hamcrest_core
diff --git a/sdks/java/io/xml/pom.xml b/sdks/java/io/xml/pom.xml
index 98018f9d756f..996063a227ce 100644
--- a/sdks/java/io/xml/pom.xml
+++ b/sdks/java/io/xml/pom.xml
@@ -109,134 +109,9 @@
       hamcrest-library
       test
     
-
-    
-      org.apache.beam
-      beam-sdks-java-io-common
-      test
-      tests
-    
-
-    
-      org.apache.beam
-      beam-sdks-java-io-common
-      test
-    
   
 
   
-    
-    
-      dataflow-runner
-      
-        
-          integrationTestRunner
-          dataflow
-        
-      
-      
-        
-          org.apache.beam
-          beam-runners-google-cloud-dataflow-java
-          runtime
-        
-      
-    
-
-    
-    
-      io-it-suite
-      
-        io-it-suite
-      
-      
-        
-        ${project.parent.parent.parent.parent.basedir}
-      
-      
-        
-          
-            org.codehaus.gmaven
-            groovy-maven-plugin
-            ${groovy-maven-plugin.version}
-            
-              
-                find-supported-python-for-compile
-                initialize
-                
-                  execute
-                
-                
-                  ${beamRootProjectDir}/sdks/python/findSupportedPython.groovy
-                
-              
-            
-          
-
-          
-            org.codehaus.mojo
-            exec-maven-plugin
-            ${maven-exec-plugin.version}
-            
-              
-                verify
-                
-                  exec
-                
-              
-            
-            
-              ${python.interpreter.bin}
-              
-                ${pkbLocation}
-                -benchmarks=beam_integration_benchmark
-                -beam_it_profile=io-it
-                -beam_it_timeout=${pkbTimeout}
-                -beam_location=${beamRootProjectDir}
-                -beam_prebuilt=true
-                -beam_sdk=java
-                
-                ${pkbBeamRunnerProfile}
-                ${pkbBeamRunnerOption}
-                
-                -beam_it_module=sdks/java/io/xml
-                -beam_it_class=${ioTest}
-                
-                -beam_it_options=${integrationTestPipelineOptions}
-                
-                -beam_extra_mvn_properties=${pkbExtraProperties}
-              
-            
-          
-          
-            org.apache.maven.plugins
-            maven-surefire-plugin
-            
-              true
-            
-          
-        
-      
-    
-
     
       java-9
       
diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java
deleted file mode 100644
index dd52ded0855d..000000000000
--- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/Bird.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.xml;
-
-import java.io.Serializable;
-import javax.xml.bind.annotation.XmlElement;
-import javax.xml.bind.annotation.XmlRootElement;
-import javax.xml.bind.annotation.XmlType;
-
-/**
- * Test JAXB annotated class.
- */
-@SuppressWarnings("unused") @XmlRootElement(name = "bird") @XmlType(propOrder = { "name",
-  "adjective" }) public final class Bird implements Serializable {
-  private String name;
-  private String adjective;
-
-  @XmlElement(name = "species")
-  public String getName() {
-    return name;
-  }
-
-  public void setName(String name) {
-    this.name = name;
-  }
-
-  public String getAdjective() {
-    return adjective;
-  }
-
-  public void setAdjective(String adjective) {
-    this.adjective = adjective;
-  }
-
-  public Bird() {}
-
-  public Bird(String adjective, String name) {
-    this.adjective = adjective;
-    this.name = name;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    Bird bird = (Bird) o;
-
-    if (!name.equals(bird.name)) {
-      return false;
-    }
-    return adjective.equals(bird.adjective);
-  }
-
-  @Override
-  public int hashCode() {
-    int result = name.hashCode();
-    result = 31 * result + adjective.hashCode();
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    return String.format("Bird: %s, %s", name, adjective);
-  }
-}
diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java
index b6a094fb42a7..cb083859b011 100644
--- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java
+++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlIOTest.java
@@ -23,9 +23,13 @@
 import com.google.common.collect.Lists;
 import java.io.File;
 import java.io.IOException;
+import java.io.Serializable;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.XmlType;
 import org.apache.beam.sdk.io.FileIO;
 import org.apache.beam.sdk.testing.NeedsRunner;
 import org.apache.beam.sdk.testing.PAssert;
@@ -204,4 +208,68 @@ public void testWriteDisplayData() {
     assertThat(displayData, hasDisplayItem("rootElement", "bird"));
     assertThat(displayData, hasDisplayItem("recordClass", Integer.class));
   }
+
+  /**
+   * Test JAXB annotated class.
+   */
+  @SuppressWarnings("unused")
+  @XmlRootElement(name = "bird")
+  @XmlType(propOrder = { "name", "adjective" })
+  private static final class Bird implements Serializable {
+    private String name;
+    private String adjective;
+
+    @XmlElement(name = "species")
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public String getAdjective() {
+      return adjective;
+    }
+
+    public void setAdjective(String adjective) {
+      this.adjective = adjective;
+    }
+
+    public Bird() {}
+
+    public Bird(String adjective, String name) {
+      this.adjective = adjective;
+      this.name = name;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      Bird bird = (Bird) o;
+
+      if (!name.equals(bird.name)) {
+        return false;
+      }
+      return adjective.equals(bird.adjective);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = name.hashCode();
+      result = 31 * result + adjective.hashCode();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Bird: %s, %s", name, adjective);
+    }
+  }
 }