diff --git a/build.gradle b/build.gradle index 282ce61d735f..4f9c40387e0e 100644 --- a/build.gradle +++ b/build.gradle @@ -77,6 +77,8 @@ subprojects { all { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } + + testArtifacts } ext { @@ -95,6 +97,15 @@ subprojects { testCompile 'org.mockito:mockito-core' } + task testJar(type: Jar){ + archiveClassifier = 'tests' + from sourceSets.test.output + } + + artifacts { + testArtifacts testJar + } + publishing { publications { nebula(MavenPublication) { @@ -284,19 +295,6 @@ project(':iceberg-hive') { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } } - - task testJar(type: Jar){ - archiveClassifier = 'tests' - from sourceSets.test.output - } - - configurations { - testArtifacts - } - - artifacts { - testArtifacts testJar - } } project(':iceberg-orc') { @@ -335,6 +333,8 @@ project(':iceberg-parquet') { compileOnly("org.apache.hadoop:hadoop-client") { exclude group: 'org.apache.avro', module: 'avro' } + + testCompile project(path: ':iceberg-core', configuration: 'testArtifacts') } } diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java similarity index 92% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java rename to core/src/test/java/org/apache/iceberg/TestMetrics.java index e75c2477592b..379d297c3bdc 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.parquet; +package org.apache.iceberg; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -32,9 +32,8 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericFixed; -import org.apache.iceberg.Metrics; -import org.apache.iceberg.Schema; import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.io.InputFile; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types.BinaryType; import org.apache.iceberg.types.Types.BooleanType; @@ -55,17 +54,24 @@ import org.junit.Assert; import org.junit.Test; -import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.types.Conversions.fromByteBuffer; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; -public class TestParquetUtil extends BaseParquetWritingTest { +/** + * Tests for Metrics. + */ +public abstract class TestMetrics { + private final UUID uuid = UUID.randomUUID(); private final GenericFixed fixed = new GenericData.Fixed( org.apache.avro.Schema.createFixed("fixedCol", null, null, 4), "abcd".getBytes(StandardCharsets.UTF_8)); + public abstract Metrics getMetrics(InputFile file); + + public abstract File writeRecords(Schema schema, Record... records) throws IOException; + @Test public void testMetricsForTopLevelFields() throws IOException { Schema schema = new Schema( @@ -113,9 +119,9 @@ public void testMetricsForTopLevelFields() throws IOException { secondRecord.put("fixedCol", fixed); secondRecord.put("binaryCol", "W".getBytes()); - File parquetFile = writeRecords(schema, firstRecord, secondRecord); + File recordsFile = writeRecords(schema, firstRecord, secondRecord); - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); + Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(2L, (long) metrics.recordCount()); assertCounts(1, 2L, 0L, metrics); assertBounds(1, BooleanType.get(), false, true, metrics); @@ -160,9 +166,9 @@ public void testMetricsForDecimals() throws IOException { record.put("decimalAsInt64", new BigDecimal("4.75")); record.put("decimalAsFixed", new BigDecimal("5.80")); - File parquetFile = writeRecords(schema, record); + File recordsFile = writeRecords(schema, record); - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); + Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1L, 0L, metrics); assertBounds(1, DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics); @@ -197,9 +203,9 @@ public void testMetricsForNestedStructFields() throws IOException { record.put("intCol", Integer.MAX_VALUE); record.put("nestedStructCol", nestedStruct); - File parquetFile = writeRecords(schema, record); + File recordsFile = writeRecords(schema, record); - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); + Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1L, 0L, metrics); assertBounds(1, IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics); @@ -232,9 +238,9 @@ public void testMetricsForListAndMapElements() throws IOException { map.put("4", struct); record.put(1, map); - File parquetFile = writeRecords(schema, record); + File recordsFile = writeRecords(schema, record); - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); + Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1, 0, metrics); assertBounds(1, IntegerType.get(), null, null, metrics); @@ -256,9 +262,9 @@ public void testMetricsForNullColumns() throws IOException { Record secondRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); secondRecord.put("intCol", null); - File parquetFile = writeRecords(schema, firstRecord, secondRecord); + File recordsFile = writeRecords(schema, firstRecord, secondRecord); - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); + Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(2L, (long) metrics.recordCount()); assertCounts(1, 2, 2, metrics); assertBounds(1, IntegerType.get(), null, null, metrics); diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java similarity index 96% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java rename to core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java index a887e19cff53..7a999048ba30 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java +++ b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java @@ -17,23 +17,23 @@ * under the License. */ -package org.apache.iceberg.parquet; +package org.apache.iceberg; +import java.nio.ByteBuffer; +import java.util.Comparator; import org.apache.iceberg.expressions.Literal; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Comparator; import static org.apache.iceberg.util.BinaryUtil.truncateBinaryMax; import static org.apache.iceberg.util.BinaryUtil.truncateBinaryMin; import static org.apache.iceberg.util.UnicodeUtil.truncateStringMax; import static org.apache.iceberg.util.UnicodeUtil.truncateStringMin; -public class TestParquetMetricsTruncation { +@SuppressWarnings("checkstyle:LocalVariableName") +public class TestMetricsTruncation { @Test - public void testTruncateBinaryMin() throws IOException { + public void testTruncateBinaryMin() { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); // Output of test1 when truncated to 2 bytes ByteBuffer test1_2_expected = ByteBuffer.wrap(new byte[] {1, 1}); @@ -55,7 +55,7 @@ public void testTruncateBinaryMin() throws IOException { } @Test - public void testTruncateBinaryMax() throws IOException { + public void testTruncateBinaryMax() { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, 2}); ByteBuffer test2 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); ByteBuffer test3 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, 2}); @@ -83,8 +83,9 @@ public void testTruncateBinaryMax() throws IOException { cmp.compare(truncateBinaryMax(Literal.of(test4), 2).value(), expectedOutput) == 0); } + @SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters") @Test - public void testTruncateStringMin() throws IOException { + public void testTruncateStringMin() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters String test1_2_expected = "イロ"; @@ -96,7 +97,6 @@ public void testTruncateStringMin() throws IOException { // test4 consists of 2 4 byte UTF-8 characters String test4 = "\uD800\uDC00\uD800\uDC00"; String test4_1_expected = "\uD800\uDC00"; - Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test1) <= 0); @@ -120,8 +120,9 @@ public void testTruncateStringMin() throws IOException { cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test4_1_expected) == 0); } + @SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters") @Test - public void testTruncateStringMax() throws IOException { + public void testTruncateStringMax() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters String test1_2_expected = "イヮ"; diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java similarity index 83% rename from parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java rename to parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java index d587b6442706..f42bdbdc6406 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java @@ -31,24 +31,23 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; import org.apache.parquet.schema.MessageType; -import org.junit.Rule; import org.junit.rules.TemporaryFolder; import static org.apache.iceberg.Files.localOutput; /** - * Base utility test class for tests that need to write Parquet files + * Utilities for tests that need to write Parquet files. */ -public abstract class BaseParquetWritingTest { +class ParquetWritingTestUtils { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + private ParquetWritingTestUtils() {} - File writeRecords(Schema schema, GenericData.Record... records) throws IOException { - return writeRecords(schema, Collections.emptyMap(), null, records); + static File writeRecords(TemporaryFolder temp, Schema schema, GenericData.Record... records) throws IOException { + return writeRecords(temp, schema, Collections.emptyMap(), null, records); } - File writeRecords( + static File writeRecords( + TemporaryFolder temp, Schema schema, Map properties, Function> createWriterFunc, GenericData.Record... records) throws IOException { diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java index 9429e45cdd30..f5433eaa160c 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java @@ -32,13 +32,19 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.MessageType; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES; +import static org.apache.iceberg.parquet.ParquetWritingTestUtils.writeRecords; import static org.apache.iceberg.types.Types.NestedField.optional; -public class TestParquet extends BaseParquetWritingTest { +public class TestParquet { + + @Rule + public TemporaryFolder temp = new TemporaryFolder(); @Test public void testRowGroupSizeConfigurable() throws IOException { @@ -79,7 +85,7 @@ private File generateFileWithTwoRowGroups(Function