From ead22f45f82913e7d39095a160d4e6b1fcb2db46 Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Thu, 25 Jul 2019 16:15:43 -0700 Subject: [PATCH 01/10] Refactor test metrics for reuse --- build.gradle | 15 + .../java/org/apache/iceberg/TestMetrics.java | 278 ++++++++++++++++++ .../parquet/BaseParquetWritingTest.java | 13 +- .../apache/iceberg/parquet/TestParquet.java | 10 +- .../iceberg/parquet/TestParquetMetrics.java | 49 +++ .../iceberg/parquet/TestParquetUtil.java | 19 +- 6 files changed, 369 insertions(+), 15 deletions(-) create mode 100644 core/src/test/java/org/apache/iceberg/TestMetrics.java create mode 100644 parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java diff --git a/build.gradle b/build.gradle index 282ce61d735f..f3bc29bec3ab 100644 --- a/build.gradle +++ b/build.gradle @@ -231,6 +231,19 @@ project(':iceberg-core') { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } } + + task testJar(type: Jar){ + archiveClassifier = 'tests' + from sourceSets.test.output + } + + configurations { + testArtifacts + } + + artifacts { + testArtifacts testJar + } } project(':iceberg-data') { @@ -335,6 +348,8 @@ project(':iceberg-parquet') { compileOnly("org.apache.hadoop:hadoop-client") { exclude group: 'org.apache.avro', module: 'avro' } + + testCompile project(path: ':iceberg-core', configuration: 'testArtifacts') } } diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java new file mode 100644 index 000000000000..32d86d36cf7b --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import java.io.File; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.UUID; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericFixed; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; + +/** + * Tests for Metrics. + */ +public abstract class TestMetrics { + + private final UUID uuid = UUID.randomUUID(); + private final GenericFixed fixed = new GenericData.Fixed( + org.apache.avro.Schema.createFixed("fixedCol", null, null, 4), + "abcd".getBytes(StandardCharsets.UTF_8)); + + public abstract Metrics getMetrics(InputFile file); + + public abstract File writeRecords(Schema schema, GenericData.Record... records) throws IOException; + + @Test + public void testMetricsForTopLevelFields() throws IOException { + Schema schema = new Schema( + optional(1, "booleanCol", Types.BooleanType.get()), + required(2, "intCol", Types.IntegerType.get()), + optional(3, "longCol", Types.LongType.get()), + required(4, "floatCol", Types.FloatType.get()), + optional(5, "doubleCol", Types.DoubleType.get()), + optional(6, "decimalCol", Types.DecimalType.of(10, 2)), + required(7, "stringCol", Types.StringType.get()), + optional(8, "dateCol", Types.DateType.get()), + required(9, "timeCol", Types.TimeType.get()), + required(10, "timestampCol", Types.TimestampType.withoutZone()), + optional(11, "uuidCol", Types.UUIDType.get()), + required(12, "fixedCol", Types.FixedType.ofLength(4)), + required(13, "binaryCol", Types.BinaryType.get()) + ); + + GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + firstRecord.put("booleanCol", true); + firstRecord.put("intCol", 3); + firstRecord.put("longCol", 5L); + firstRecord.put("floatCol", 2.0F); + firstRecord.put("doubleCol", 2.0D); + firstRecord.put("decimalCol", new BigDecimal("3.50")); + firstRecord.put("stringCol", "AAA"); + firstRecord.put("dateCol", 1500); + firstRecord.put("timeCol", 2000L); + firstRecord.put("timestampCol", 0L); + firstRecord.put("uuidCol", uuid); + firstRecord.put("fixedCol", fixed); + firstRecord.put("binaryCol", "S".getBytes()); + + GenericData.Record secondRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + secondRecord.put("booleanCol", false); + secondRecord.put("intCol", Integer.MIN_VALUE); + secondRecord.put("longCol", null); + secondRecord.put("floatCol", 1.0F); + secondRecord.put("doubleCol", null); + secondRecord.put("decimalCol", null); + secondRecord.put("stringCol", "ZZZ"); + secondRecord.put("dateCol", null); + secondRecord.put("timeCol", 3000L); + secondRecord.put("timestampCol", 1000L); + secondRecord.put("uuidCol", null); + secondRecord.put("fixedCol", fixed); + secondRecord.put("binaryCol", "W".getBytes()); + + File recordsFile = writeRecords(schema, firstRecord, secondRecord); + + Metrics metrics = getMetrics(Files.localInput(recordsFile)); + Assert.assertEquals(2L, (long) metrics.recordCount()); + assertCounts(1, 2L, 0L, metrics); + assertBounds(1, Types.BooleanType.get(), false, true, metrics); + assertCounts(2, 2L, 0L, metrics); + assertBounds(2, Types.IntegerType.get(), Integer.MIN_VALUE, 3, metrics); + assertCounts(3, 2L, 1L, metrics); + assertBounds(3, Types.LongType.get(), 5L, 5L, metrics); + assertCounts(4, 2L, 0L, metrics); + assertBounds(4, Types.FloatType.get(), 1.0F, 2.0F, metrics); + assertCounts(5, 2L, 1L, metrics); + assertBounds(5, Types.DoubleType.get(), 2.0D, 2.0D, metrics); + assertCounts(6, 2L, 1L, metrics); + assertBounds(6, Types.DecimalType.of(10, 2), new BigDecimal("3.50"), new BigDecimal("3.50"), metrics); + assertCounts(7, 2L, 0L, metrics); + assertBounds(7, Types.StringType.get(), CharBuffer.wrap("AAA"), CharBuffer.wrap("ZZZ"), metrics); + assertCounts(8, 2L, 1L, metrics); + assertBounds(8, Types.DateType.get(), 1500, 1500, metrics); + assertCounts(9, 2L, 0L, metrics); + assertBounds(9, Types.TimeType.get(), 2000L, 3000L, metrics); + assertCounts(10, 2L, 0L, metrics); + assertBounds(10, Types.TimestampType.withoutZone(), 0L, 1000L, metrics); + assertCounts(11, 2L, 1L, metrics); + assertBounds(11, Types.UUIDType.get(), uuid, uuid, metrics); + assertCounts(12, 2L, 0L, metrics); + assertBounds(12, Types.FixedType.ofLength(4), + ByteBuffer.wrap(fixed.bytes()), ByteBuffer.wrap(fixed.bytes()), metrics); + assertCounts(13, 2L, 0L, metrics); + assertBounds(13, Types.BinaryType.get(), + ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), metrics); + } + + @Test + public void testMetricsForDecimals() throws IOException { + Schema schema = new Schema( + required(1, "decimalAsInt32", Types.DecimalType.of(4, 2)), + required(2, "decimalAsInt64", Types.DecimalType.of(14, 2)), + required(3, "decimalAsFixed", Types.DecimalType.of(22, 2)) + ); + + GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + record.put("decimalAsInt32", new BigDecimal("2.55")); + record.put("decimalAsInt64", new BigDecimal("4.75")); + record.put("decimalAsFixed", new BigDecimal("5.80")); + + File recordsFile = writeRecords(schema, record); + + Metrics metrics = getMetrics(Files.localInput(recordsFile)); + Assert.assertEquals(1L, (long) metrics.recordCount()); + assertCounts(1, 1L, 0L, metrics); + assertBounds(1, Types.DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics); + assertCounts(2, 1L, 0L, metrics); + assertBounds(2, Types.DecimalType.of(14, 2), new BigDecimal("4.75"), new BigDecimal("4.75"), metrics); + assertCounts(3, 1L, 0L, metrics); + assertBounds(3, Types.DecimalType.of(22, 2), new BigDecimal("5.80"), new BigDecimal("5.80"), metrics); + } + + @Test + public void testMetricsForNestedStructFields() throws IOException { + Types.StructType leafStructType = Types.StructType.of( + optional(5, "leafLongCol", Types.LongType.get()), + optional(6, "leafBinaryCol", Types.BinaryType.get()) + ); + Types.StructType nestedStructType = Types.StructType.of( + required(3, "longCol", Types.LongType.get()), + required(4, "leafStructCol", leafStructType) + ); + Schema schema = new Schema( + required(1, "intCol", Types.IntegerType.get()), + required(2, "nestedStructCol", nestedStructType) + ); + + GenericData.Record leafStruct = new GenericData.Record(AvroSchemaUtil.convert(leafStructType)); + leafStruct.put("leafLongCol", 20L); + leafStruct.put("leafBinaryCol", "A".getBytes()); + GenericData.Record nestedStruct = new GenericData.Record(AvroSchemaUtil.convert(nestedStructType)); + nestedStruct.put("longCol", 100L); + nestedStruct.put("leafStructCol", leafStruct); + GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + record.put("intCol", Integer.MAX_VALUE); + record.put("nestedStructCol", nestedStruct); + + File recordsFile = writeRecords(schema, record); + + Metrics metrics = getMetrics(Files.localInput(recordsFile)); + Assert.assertEquals(1L, (long) metrics.recordCount()); + assertCounts(1, 1L, 0L, metrics); + assertBounds(1, Types.IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics); + assertCounts(3, 1L, 0L, metrics); + assertBounds(3, Types.LongType.get(), 100L, 100L, metrics); + assertCounts(5, 1L, 0L, metrics); + assertBounds(5, Types.LongType.get(), 20L, 20L, metrics); + assertCounts(6, 1L, 0L, metrics); + assertBounds(6, Types.BinaryType.get(), + ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics); + } + + @Test + public void testMetricsForListAndMapElements() throws IOException { + Types.StructType structType = Types.StructType.of( + required(1, "leafIntCol", Types.IntegerType.get()), + optional(2, "leafStringCol", Types.StringType.get()) + ); + Schema schema = new Schema( + optional(3, "intListCol", Types.ListType.ofRequired(4, Types.IntegerType.get())), + optional(5, "mapCol", Types.MapType.ofRequired(6, 7, Types.StringType.get(), structType)) + ); + + GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + record.put("intListCol", Lists.newArrayList(10, 11, 12)); + GenericData.Record struct = new GenericData.Record(AvroSchemaUtil.convert(structType)); + struct.put("leafIntCol", 1); + struct.put("leafStringCol", "BBB"); + Map map = Maps.newHashMap(); + map.put("4", struct); + record.put(1, map); + + File recordsFile = writeRecords(schema, record); + + Metrics metrics = getMetrics(Files.localInput(recordsFile)); + Assert.assertEquals(1L, (long) metrics.recordCount()); + assertCounts(1, 1, 0, metrics); + assertBounds(1, Types.IntegerType.get(), null, null, metrics); + assertCounts(2, 1, 0, metrics); + assertBounds(2, Types.StringType.get(), null, null, metrics); + assertCounts(4, 3, 0, metrics); + assertBounds(4, Types.IntegerType.get(), null, null, metrics); + assertCounts(6, 1, 0, metrics); + assertBounds(6, Types.StringType.get(), null, null, metrics); + } + + @Test + public void testMetricsForNullColumns() throws IOException { + Schema schema = new Schema( + optional(1, "intCol", Types.IntegerType.get()) + ); + GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + firstRecord.put("intCol", null); + GenericData.Record secondRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + secondRecord.put("intCol", null); + + File recordsFile = writeRecords(schema, firstRecord, secondRecord); + + Metrics metrics = getMetrics(Files.localInput(recordsFile)); + Assert.assertEquals(2L, (long) metrics.recordCount()); + assertCounts(1, 2, 2, metrics); + assertBounds(1, Types.IntegerType.get(), null, null, metrics); + } + + private void assertCounts(int fieldId, long valueCount, long nullValueCount, Metrics metrics) { + Map valueCounts = metrics.valueCounts(); + Map nullValueCounts = metrics.nullValueCounts(); + Assert.assertEquals(valueCount, (long) valueCounts.get(fieldId)); + Assert.assertEquals(nullValueCount, (long) nullValueCounts.get(fieldId)); + } + + private void assertBounds(int fieldId, Type type, T lowerBound, T upperBound, Metrics metrics) { + Map lowerBounds = metrics.lowerBounds(); + Map upperBounds = metrics.upperBounds(); + + Assert.assertEquals( + lowerBound, + lowerBounds.containsKey(fieldId) ? + Conversions.fromByteBuffer(type, lowerBounds.get(fieldId)) : null); + Assert.assertEquals( + upperBound, + upperBounds.containsKey(fieldId) ? + Conversions.fromByteBuffer(type, upperBounds.get(fieldId)) : null); + } +} diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java b/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java index d587b6442706..50f533113e97 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java @@ -31,7 +31,6 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; import org.apache.parquet.schema.MessageType; -import org.junit.Rule; import org.junit.rules.TemporaryFolder; import static org.apache.iceberg.Files.localOutput; @@ -39,16 +38,16 @@ /** * Base utility test class for tests that need to write Parquet files */ -public abstract class BaseParquetWritingTest { +class BaseParquetWritingTest { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + private BaseParquetWritingTest() {} - File writeRecords(Schema schema, GenericData.Record... records) throws IOException { - return writeRecords(schema, Collections.emptyMap(), null, records); + static File writeRecords(TemporaryFolder temp, Schema schema, GenericData.Record... records) throws IOException { + return writeRecords(temp, schema, Collections.emptyMap(), null, records); } - File writeRecords( + static File writeRecords( + TemporaryFolder temp, Schema schema, Map properties, Function> createWriterFunc, GenericData.Record... records) throws IOException { diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java index 9429e45cdd30..b8bb2e572992 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java @@ -32,13 +32,19 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.MessageType; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES; +import static org.apache.iceberg.parquet.BaseParquetWritingTest.writeRecords; import static org.apache.iceberg.types.Types.NestedField.optional; -public class TestParquet extends BaseParquetWritingTest { +public class TestParquet { + + @Rule + public TemporaryFolder temp = new TemporaryFolder(); @Test public void testRowGroupSizeConfigurable() throws IOException { @@ -79,7 +85,7 @@ private File generateFileWithTwoRowGroups(Function Date: Tue, 30 Jul 2019 15:05:08 -0700 Subject: [PATCH 02/10] Remove TestParquetUtil replaced by TestParquetMetrics --- .../iceberg/parquet/TestParquetUtil.java | 293 ------------------ 1 file changed, 293 deletions(-) delete mode 100644 parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java deleted file mode 100644 index 13c0bf1047c2..000000000000 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetUtil.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.parquet; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import java.io.File; -import java.io.IOException; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Map; -import java.util.UUID; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericData.Record; -import org.apache.avro.generic.GenericFixed; -import org.apache.iceberg.Metrics; -import org.apache.iceberg.Schema; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types.BinaryType; -import org.apache.iceberg.types.Types.BooleanType; -import org.apache.iceberg.types.Types.DateType; -import org.apache.iceberg.types.Types.DecimalType; -import org.apache.iceberg.types.Types.DoubleType; -import org.apache.iceberg.types.Types.FixedType; -import org.apache.iceberg.types.Types.FloatType; -import org.apache.iceberg.types.Types.IntegerType; -import org.apache.iceberg.types.Types.ListType; -import org.apache.iceberg.types.Types.LongType; -import org.apache.iceberg.types.Types.MapType; -import org.apache.iceberg.types.Types.StringType; -import org.apache.iceberg.types.Types.StructType; -import org.apache.iceberg.types.Types.TimeType; -import org.apache.iceberg.types.Types.TimestampType; -import org.apache.iceberg.types.Types.UUIDType; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.apache.iceberg.Files.localInput; -import static org.apache.iceberg.parquet.BaseParquetWritingTest.writeRecords; -import static org.apache.iceberg.types.Conversions.fromByteBuffer; -import static org.apache.iceberg.types.Types.NestedField.optional; -import static org.apache.iceberg.types.Types.NestedField.required; - -public class TestParquetUtil { - - @Rule - public TemporaryFolder temp = new TemporaryFolder(); - - private final UUID uuid = UUID.randomUUID(); - private final GenericFixed fixed = new GenericData.Fixed( - org.apache.avro.Schema.createFixed("fixedCol", null, null, 4), - "abcd".getBytes(StandardCharsets.UTF_8)); - - @Test - public void testMetricsForTopLevelFields() throws IOException { - Schema schema = new Schema( - optional(1, "booleanCol", BooleanType.get()), - required(2, "intCol", IntegerType.get()), - optional(3, "longCol", LongType.get()), - required(4, "floatCol", FloatType.get()), - optional(5, "doubleCol", DoubleType.get()), - optional(6, "decimalCol", DecimalType.of(10, 2)), - required(7, "stringCol", StringType.get()), - optional(8, "dateCol", DateType.get()), - required(9, "timeCol", TimeType.get()), - required(10, "timestampCol", TimestampType.withoutZone()), - optional(11, "uuidCol", UUIDType.get()), - required(12, "fixedCol", FixedType.ofLength(4)), - required(13, "binaryCol", BinaryType.get()) - ); - - Record firstRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); - firstRecord.put("booleanCol", true); - firstRecord.put("intCol", 3); - firstRecord.put("longCol", 5L); - firstRecord.put("floatCol", 2.0F); - firstRecord.put("doubleCol", 2.0D); - firstRecord.put("decimalCol", new BigDecimal("3.50")); - firstRecord.put("stringCol", "AAA"); - firstRecord.put("dateCol", 1500); - firstRecord.put("timeCol", 2000L); - firstRecord.put("timestampCol", 0L); - firstRecord.put("uuidCol", uuid); - firstRecord.put("fixedCol", fixed); - firstRecord.put("binaryCol", "S".getBytes()); - Record secondRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); - secondRecord.put("booleanCol", false); - secondRecord.put("intCol", Integer.MIN_VALUE); - secondRecord.put("longCol", null); - secondRecord.put("floatCol", 1.0F); - secondRecord.put("doubleCol", null); - secondRecord.put("decimalCol", null); - secondRecord.put("stringCol", "ZZZ"); - secondRecord.put("dateCol", null); - secondRecord.put("timeCol", 3000L); - secondRecord.put("timestampCol", 1000L); - secondRecord.put("uuidCol", null); - secondRecord.put("fixedCol", fixed); - secondRecord.put("binaryCol", "W".getBytes()); - - File parquetFile = writeRecords(temp, schema, firstRecord, secondRecord); - - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); - Assert.assertEquals(2L, (long) metrics.recordCount()); - assertCounts(1, 2L, 0L, metrics); - assertBounds(1, BooleanType.get(), false, true, metrics); - assertCounts(2, 2L, 0L, metrics); - assertBounds(2, IntegerType.get(), Integer.MIN_VALUE, 3, metrics); - assertCounts(3, 2L, 1L, metrics); - assertBounds(3, LongType.get(), 5L, 5L, metrics); - assertCounts(4, 2L, 0L, metrics); - assertBounds(4, FloatType.get(), 1.0F, 2.0F, metrics); - assertCounts(5, 2L, 1L, metrics); - assertBounds(5, DoubleType.get(), 2.0D, 2.0D, metrics); - assertCounts(6, 2L, 1L, metrics); - assertBounds(6, DecimalType.of(10, 2), new BigDecimal("3.50"), new BigDecimal("3.50"), metrics); - assertCounts(7, 2L, 0L, metrics); - assertBounds(7, StringType.get(), CharBuffer.wrap("AAA"), CharBuffer.wrap("ZZZ"), metrics); - assertCounts(8, 2L, 1L, metrics); - assertBounds(8, DateType.get(), 1500, 1500, metrics); - assertCounts(9, 2L, 0L, metrics); - assertBounds(9, TimeType.get(), 2000L, 3000L, metrics); - assertCounts(10, 2L, 0L, metrics); - assertBounds(10, TimestampType.withoutZone(), 0L, 1000L, metrics); - assertCounts(11, 2L, 1L, metrics); - assertBounds(11, UUIDType.get(), uuid, uuid, metrics); - assertCounts(12, 2L, 0L, metrics); - assertBounds(12, FixedType.ofLength(4), - ByteBuffer.wrap(fixed.bytes()), ByteBuffer.wrap(fixed.bytes()), metrics); - assertCounts(13, 2L, 0L, metrics); - assertBounds(13, BinaryType.get(), - ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), metrics); - } - - @Test - public void testMetricsForDecimals() throws IOException { - Schema schema = new Schema( - required(1, "decimalAsInt32", DecimalType.of(4, 2)), - required(2, "decimalAsInt64", DecimalType.of(14, 2)), - required(3, "decimalAsFixed", DecimalType.of(22, 2)) - ); - - Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); - record.put("decimalAsInt32", new BigDecimal("2.55")); - record.put("decimalAsInt64", new BigDecimal("4.75")); - record.put("decimalAsFixed", new BigDecimal("5.80")); - - File parquetFile = writeRecords(temp, schema, record); - - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); - Assert.assertEquals(1L, (long) metrics.recordCount()); - assertCounts(1, 1L, 0L, metrics); - assertBounds(1, DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics); - assertCounts(2, 1L, 0L, metrics); - assertBounds(2, DecimalType.of(14, 2), new BigDecimal("4.75"), new BigDecimal("4.75"), metrics); - assertCounts(3, 1L, 0L, metrics); - assertBounds(3, DecimalType.of(22, 2), new BigDecimal("5.80"), new BigDecimal("5.80"), metrics); - } - - @Test - public void testMetricsForNestedStructFields() throws IOException { - StructType leafStructType = StructType.of( - optional(5, "leafLongCol", LongType.get()), - optional(6, "leafBinaryCol", BinaryType.get()) - ); - StructType nestedStructType = StructType.of( - required(3, "longCol", LongType.get()), - required(4, "leafStructCol", leafStructType) - ); - Schema schema = new Schema( - required(1, "intCol", IntegerType.get()), - required(2, "nestedStructCol", nestedStructType) - ); - - Record leafStruct = new Record(AvroSchemaUtil.convert(leafStructType)); - leafStruct.put("leafLongCol", 20L); - leafStruct.put("leafBinaryCol", "A".getBytes()); - Record nestedStruct = new Record(AvroSchemaUtil.convert(nestedStructType)); - nestedStruct.put("longCol", 100L); - nestedStruct.put("leafStructCol", leafStruct); - Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); - record.put("intCol", Integer.MAX_VALUE); - record.put("nestedStructCol", nestedStruct); - - File parquetFile = writeRecords(temp, schema, record); - - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); - Assert.assertEquals(1L, (long) metrics.recordCount()); - assertCounts(1, 1L, 0L, metrics); - assertBounds(1, IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics); - assertCounts(3, 1L, 0L, metrics); - assertBounds(3, LongType.get(), 100L, 100L, metrics); - assertCounts(5, 1L, 0L, metrics); - assertBounds(5, LongType.get(), 20L, 20L, metrics); - assertCounts(6, 1L, 0L, metrics); - assertBounds(6, BinaryType.get(), - ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics); - } - - @Test - public void testMetricsForListAndMapElements() throws IOException { - StructType structType = StructType.of( - required(1, "leafIntCol", IntegerType.get()), - optional(2, "leafStringCol", StringType.get()) - ); - Schema schema = new Schema( - optional(3, "intListCol", ListType.ofRequired(4, IntegerType.get())), - optional(5, "mapCol", MapType.ofRequired(6, 7, StringType.get(), structType)) - ); - - Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); - record.put("intListCol", Lists.newArrayList(10, 11, 12)); - Record struct = new Record(AvroSchemaUtil.convert(structType)); - struct.put("leafIntCol", 1); - struct.put("leafStringCol", "BBB"); - Map map = Maps.newHashMap(); - map.put("4", struct); - record.put(1, map); - - File parquetFile = writeRecords(temp, schema, record); - - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); - Assert.assertEquals(1L, (long) metrics.recordCount()); - assertCounts(1, 1, 0, metrics); - assertBounds(1, IntegerType.get(), null, null, metrics); - assertCounts(2, 1, 0, metrics); - assertBounds(2, StringType.get(), null, null, metrics); - assertCounts(4, 3, 0, metrics); - assertBounds(4, IntegerType.get(), null, null, metrics); - assertCounts(6, 1, 0, metrics); - assertBounds(6, StringType.get(), null, null, metrics); - } - - @Test - public void testMetricsForNullColumns() throws IOException { - Schema schema = new Schema( - optional(1, "intCol", IntegerType.get()) - ); - Record firstRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); - firstRecord.put("intCol", null); - Record secondRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); - secondRecord.put("intCol", null); - - File parquetFile = writeRecords(temp, schema, firstRecord, secondRecord); - - Metrics metrics = ParquetUtil.fileMetrics(localInput(parquetFile)); - Assert.assertEquals(2L, (long) metrics.recordCount()); - assertCounts(1, 2, 2, metrics); - assertBounds(1, IntegerType.get(), null, null, metrics); - } - - private void assertCounts(int fieldId, long valueCount, long nullValueCount, Metrics metrics) { - Map valueCounts = metrics.valueCounts(); - Map nullValueCounts = metrics.nullValueCounts(); - Assert.assertEquals(valueCount, (long) valueCounts.get(fieldId)); - Assert.assertEquals(nullValueCount, (long) nullValueCounts.get(fieldId)); - } - - private void assertBounds(int fieldId, Type type, T lowerBound, T upperBound, Metrics metrics) { - Map lowerBounds = metrics.lowerBounds(); - Map upperBounds = metrics.upperBounds(); - - Assert.assertEquals( - lowerBound, - lowerBounds.containsKey(fieldId) ? fromByteBuffer(type, lowerBounds.get(fieldId)) : null); - Assert.assertEquals( - upperBound, - upperBounds.containsKey(fieldId) ? fromByteBuffer(type, upperBounds.get(fieldId)) : null); - } - -} From ada55fbb431a1fc0c6fa0455a9aa38ecf3bfafd5 Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Tue, 30 Jul 2019 15:35:18 -0700 Subject: [PATCH 03/10] Relocate TestMetricsTruncation to core --- .../test/java/org/apache/iceberg/TestMetricsTruncation.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java => core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java (99%) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java similarity index 99% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java rename to core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java index a887e19cff53..1f282c9816d4 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetricsTruncation.java +++ b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.parquet; +package org.apache.iceberg; import org.apache.iceberg.expressions.Literal; import org.junit.Assert; @@ -31,7 +31,7 @@ import static org.apache.iceberg.util.UnicodeUtil.truncateStringMax; import static org.apache.iceberg.util.UnicodeUtil.truncateStringMin; -public class TestParquetMetricsTruncation { +public class TestMetricsTruncation { @Test public void testTruncateBinaryMin() throws IOException { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); From bdebffd20a0a1d7279ff3701c3cc33617e469999 Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Tue, 30 Jul 2019 15:44:10 -0700 Subject: [PATCH 04/10] Restore imports --- .../java/org/apache/iceberg/TestMetrics.java | 123 ++++++++++-------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java index 32d86d36cf7b..a6658314ce25 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetrics.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -35,7 +35,22 @@ import org.apache.iceberg.io.InputFile; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.BinaryType; +import org.apache.iceberg.types.Types.BooleanType; +import org.apache.iceberg.types.Types.DateType; +import org.apache.iceberg.types.Types.DecimalType; +import org.apache.iceberg.types.Types.DoubleType; +import org.apache.iceberg.types.Types.FixedType; +import org.apache.iceberg.types.Types.FloatType; +import org.apache.iceberg.types.Types.IntegerType; +import org.apache.iceberg.types.Types.ListType; +import org.apache.iceberg.types.Types.LongType; +import org.apache.iceberg.types.Types.MapType; +import org.apache.iceberg.types.Types.StringType; +import org.apache.iceberg.types.Types.StructType; +import org.apache.iceberg.types.Types.TimeType; +import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types.UUIDType; import org.junit.Assert; import org.junit.Test; @@ -59,19 +74,19 @@ public abstract class TestMetrics { @Test public void testMetricsForTopLevelFields() throws IOException { Schema schema = new Schema( - optional(1, "booleanCol", Types.BooleanType.get()), - required(2, "intCol", Types.IntegerType.get()), - optional(3, "longCol", Types.LongType.get()), - required(4, "floatCol", Types.FloatType.get()), - optional(5, "doubleCol", Types.DoubleType.get()), - optional(6, "decimalCol", Types.DecimalType.of(10, 2)), - required(7, "stringCol", Types.StringType.get()), - optional(8, "dateCol", Types.DateType.get()), - required(9, "timeCol", Types.TimeType.get()), - required(10, "timestampCol", Types.TimestampType.withoutZone()), - optional(11, "uuidCol", Types.UUIDType.get()), - required(12, "fixedCol", Types.FixedType.ofLength(4)), - required(13, "binaryCol", Types.BinaryType.get()) + optional(1, "booleanCol", BooleanType.get()), + required(2, "intCol", IntegerType.get()), + optional(3, "longCol", LongType.get()), + required(4, "floatCol", FloatType.get()), + optional(5, "doubleCol", DoubleType.get()), + optional(6, "decimalCol", DecimalType.of(10, 2)), + required(7, "stringCol", StringType.get()), + optional(8, "dateCol", DateType.get()), + required(9, "timeCol", TimeType.get()), + required(10, "timestampCol", TimestampType.withoutZone()), + optional(11, "uuidCol", UUIDType.get()), + required(12, "fixedCol", FixedType.ofLength(4)), + required(13, "binaryCol", BinaryType.get()) ); GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); @@ -109,41 +124,41 @@ public void testMetricsForTopLevelFields() throws IOException { Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(2L, (long) metrics.recordCount()); assertCounts(1, 2L, 0L, metrics); - assertBounds(1, Types.BooleanType.get(), false, true, metrics); + assertBounds(1, BooleanType.get(), false, true, metrics); assertCounts(2, 2L, 0L, metrics); - assertBounds(2, Types.IntegerType.get(), Integer.MIN_VALUE, 3, metrics); + assertBounds(2, IntegerType.get(), Integer.MIN_VALUE, 3, metrics); assertCounts(3, 2L, 1L, metrics); - assertBounds(3, Types.LongType.get(), 5L, 5L, metrics); + assertBounds(3, LongType.get(), 5L, 5L, metrics); assertCounts(4, 2L, 0L, metrics); - assertBounds(4, Types.FloatType.get(), 1.0F, 2.0F, metrics); + assertBounds(4, FloatType.get(), 1.0F, 2.0F, metrics); assertCounts(5, 2L, 1L, metrics); - assertBounds(5, Types.DoubleType.get(), 2.0D, 2.0D, metrics); + assertBounds(5, DoubleType.get(), 2.0D, 2.0D, metrics); assertCounts(6, 2L, 1L, metrics); - assertBounds(6, Types.DecimalType.of(10, 2), new BigDecimal("3.50"), new BigDecimal("3.50"), metrics); + assertBounds(6, DecimalType.of(10, 2), new BigDecimal("3.50"), new BigDecimal("3.50"), metrics); assertCounts(7, 2L, 0L, metrics); - assertBounds(7, Types.StringType.get(), CharBuffer.wrap("AAA"), CharBuffer.wrap("ZZZ"), metrics); + assertBounds(7, StringType.get(), CharBuffer.wrap("AAA"), CharBuffer.wrap("ZZZ"), metrics); assertCounts(8, 2L, 1L, metrics); - assertBounds(8, Types.DateType.get(), 1500, 1500, metrics); + assertBounds(8, DateType.get(), 1500, 1500, metrics); assertCounts(9, 2L, 0L, metrics); - assertBounds(9, Types.TimeType.get(), 2000L, 3000L, metrics); + assertBounds(9, TimeType.get(), 2000L, 3000L, metrics); assertCounts(10, 2L, 0L, metrics); - assertBounds(10, Types.TimestampType.withoutZone(), 0L, 1000L, metrics); + assertBounds(10, TimestampType.withoutZone(), 0L, 1000L, metrics); assertCounts(11, 2L, 1L, metrics); - assertBounds(11, Types.UUIDType.get(), uuid, uuid, metrics); + assertBounds(11, UUIDType.get(), uuid, uuid, metrics); assertCounts(12, 2L, 0L, metrics); - assertBounds(12, Types.FixedType.ofLength(4), + assertBounds(12, FixedType.ofLength(4), ByteBuffer.wrap(fixed.bytes()), ByteBuffer.wrap(fixed.bytes()), metrics); assertCounts(13, 2L, 0L, metrics); - assertBounds(13, Types.BinaryType.get(), + assertBounds(13, BinaryType.get(), ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), metrics); } @Test public void testMetricsForDecimals() throws IOException { Schema schema = new Schema( - required(1, "decimalAsInt32", Types.DecimalType.of(4, 2)), - required(2, "decimalAsInt64", Types.DecimalType.of(14, 2)), - required(3, "decimalAsFixed", Types.DecimalType.of(22, 2)) + required(1, "decimalAsInt32", DecimalType.of(4, 2)), + required(2, "decimalAsInt64", DecimalType.of(14, 2)), + required(3, "decimalAsFixed", DecimalType.of(22, 2)) ); GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); @@ -156,25 +171,25 @@ public void testMetricsForDecimals() throws IOException { Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1L, 0L, metrics); - assertBounds(1, Types.DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics); + assertBounds(1, DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics); assertCounts(2, 1L, 0L, metrics); - assertBounds(2, Types.DecimalType.of(14, 2), new BigDecimal("4.75"), new BigDecimal("4.75"), metrics); + assertBounds(2, DecimalType.of(14, 2), new BigDecimal("4.75"), new BigDecimal("4.75"), metrics); assertCounts(3, 1L, 0L, metrics); - assertBounds(3, Types.DecimalType.of(22, 2), new BigDecimal("5.80"), new BigDecimal("5.80"), metrics); + assertBounds(3, DecimalType.of(22, 2), new BigDecimal("5.80"), new BigDecimal("5.80"), metrics); } @Test public void testMetricsForNestedStructFields() throws IOException { - Types.StructType leafStructType = Types.StructType.of( - optional(5, "leafLongCol", Types.LongType.get()), - optional(6, "leafBinaryCol", Types.BinaryType.get()) + StructType leafStructType = StructType.of( + optional(5, "leafLongCol", LongType.get()), + optional(6, "leafBinaryCol", BinaryType.get()) ); - Types.StructType nestedStructType = Types.StructType.of( - required(3, "longCol", Types.LongType.get()), + StructType nestedStructType = StructType.of( + required(3, "longCol", LongType.get()), required(4, "leafStructCol", leafStructType) ); Schema schema = new Schema( - required(1, "intCol", Types.IntegerType.get()), + required(1, "intCol", IntegerType.get()), required(2, "nestedStructCol", nestedStructType) ); @@ -193,25 +208,25 @@ public void testMetricsForNestedStructFields() throws IOException { Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1L, 0L, metrics); - assertBounds(1, Types.IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics); + assertBounds(1, IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics); assertCounts(3, 1L, 0L, metrics); - assertBounds(3, Types.LongType.get(), 100L, 100L, metrics); + assertBounds(3, LongType.get(), 100L, 100L, metrics); assertCounts(5, 1L, 0L, metrics); - assertBounds(5, Types.LongType.get(), 20L, 20L, metrics); + assertBounds(5, LongType.get(), 20L, 20L, metrics); assertCounts(6, 1L, 0L, metrics); - assertBounds(6, Types.BinaryType.get(), + assertBounds(6, BinaryType.get(), ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics); } @Test public void testMetricsForListAndMapElements() throws IOException { - Types.StructType structType = Types.StructType.of( - required(1, "leafIntCol", Types.IntegerType.get()), - optional(2, "leafStringCol", Types.StringType.get()) + StructType structType = StructType.of( + required(1, "leafIntCol", IntegerType.get()), + optional(2, "leafStringCol", StringType.get()) ); Schema schema = new Schema( - optional(3, "intListCol", Types.ListType.ofRequired(4, Types.IntegerType.get())), - optional(5, "mapCol", Types.MapType.ofRequired(6, 7, Types.StringType.get(), structType)) + optional(3, "intListCol", ListType.ofRequired(4, IntegerType.get())), + optional(5, "mapCol", MapType.ofRequired(6, 7, StringType.get(), structType)) ); GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); @@ -228,19 +243,19 @@ public void testMetricsForListAndMapElements() throws IOException { Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(1L, (long) metrics.recordCount()); assertCounts(1, 1, 0, metrics); - assertBounds(1, Types.IntegerType.get(), null, null, metrics); + assertBounds(1, IntegerType.get(), null, null, metrics); assertCounts(2, 1, 0, metrics); - assertBounds(2, Types.StringType.get(), null, null, metrics); + assertBounds(2, StringType.get(), null, null, metrics); assertCounts(4, 3, 0, metrics); - assertBounds(4, Types.IntegerType.get(), null, null, metrics); + assertBounds(4, IntegerType.get(), null, null, metrics); assertCounts(6, 1, 0, metrics); - assertBounds(6, Types.StringType.get(), null, null, metrics); + assertBounds(6, StringType.get(), null, null, metrics); } @Test public void testMetricsForNullColumns() throws IOException { Schema schema = new Schema( - optional(1, "intCol", Types.IntegerType.get()) + optional(1, "intCol", IntegerType.get()) ); GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); firstRecord.put("intCol", null); @@ -252,7 +267,7 @@ public void testMetricsForNullColumns() throws IOException { Metrics metrics = getMetrics(Files.localInput(recordsFile)); Assert.assertEquals(2L, (long) metrics.recordCount()); assertCounts(1, 2, 2, metrics); - assertBounds(1, Types.IntegerType.get(), null, null, metrics); + assertBounds(1, IntegerType.get(), null, null, metrics); } private void assertCounts(int fieldId, long valueCount, long nullValueCount, Metrics metrics) { From 689b79c892438942fcb2a73ee08f97c32726537d Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Tue, 30 Jul 2019 15:47:07 -0700 Subject: [PATCH 05/10] Rename parquet writing utils --- ...seParquetWritingTest.java => ParquetWritingTestUtils.java} | 4 ++-- .../src/test/java/org/apache/iceberg/parquet/TestParquet.java | 2 +- .../java/org/apache/iceberg/parquet/TestParquetMetrics.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename parquet/src/test/java/org/apache/iceberg/parquet/{BaseParquetWritingTest.java => ParquetWritingTestUtils.java} (97%) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java similarity index 97% rename from parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java rename to parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java index 50f533113e97..c3036cab8791 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/BaseParquetWritingTest.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java @@ -38,9 +38,9 @@ /** * Base utility test class for tests that need to write Parquet files */ -class BaseParquetWritingTest { +class ParquetWritingTestUtils { - private BaseParquetWritingTest() {} + private ParquetWritingTestUtils() {} static File writeRecords(TemporaryFolder temp, Schema schema, GenericData.Record... records) throws IOException { return writeRecords(temp, schema, Collections.emptyMap(), null, records); diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java index b8bb2e572992..f5433eaa160c 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java @@ -38,7 +38,7 @@ import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES; -import static org.apache.iceberg.parquet.BaseParquetWritingTest.writeRecords; +import static org.apache.iceberg.parquet.ParquetWritingTestUtils.writeRecords; import static org.apache.iceberg.types.Types.NestedField.optional; public class TestParquet { diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java index 0e5d3246f154..a838c92e5fc0 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java @@ -44,6 +44,6 @@ public Metrics getMetrics(InputFile file) { @Override public File writeRecords(Schema schema, GenericData.Record... records) throws IOException { - return BaseParquetWritingTest.writeRecords(temp, schema, records); + return ParquetWritingTestUtils.writeRecords(temp, schema, records); } } From 7a61e88940a664b638cf19abb1e840c78b7a604c Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Tue, 30 Jul 2019 15:59:10 -0700 Subject: [PATCH 06/10] Restore more imports --- .../java/org/apache/iceberg/TestMetrics.java | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java index a6658314ce25..379d297c3bdc 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetrics.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -30,10 +30,10 @@ import java.util.Map; import java.util.UUID; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericFixed; import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types.BinaryType; import org.apache.iceberg.types.Types.BooleanType; @@ -54,6 +54,7 @@ import org.junit.Assert; import org.junit.Test; +import static org.apache.iceberg.types.Conversions.fromByteBuffer; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @@ -69,7 +70,7 @@ public abstract class TestMetrics { public abstract Metrics getMetrics(InputFile file); - public abstract File writeRecords(Schema schema, GenericData.Record... records) throws IOException; + public abstract File writeRecords(Schema schema, Record... records) throws IOException; @Test public void testMetricsForTopLevelFields() throws IOException { @@ -89,7 +90,7 @@ public void testMetricsForTopLevelFields() throws IOException { required(13, "binaryCol", BinaryType.get()) ); - GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record firstRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); firstRecord.put("booleanCol", true); firstRecord.put("intCol", 3); firstRecord.put("longCol", 5L); @@ -103,8 +104,7 @@ public void testMetricsForTopLevelFields() throws IOException { firstRecord.put("uuidCol", uuid); firstRecord.put("fixedCol", fixed); firstRecord.put("binaryCol", "S".getBytes()); - - GenericData.Record secondRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record secondRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); secondRecord.put("booleanCol", false); secondRecord.put("intCol", Integer.MIN_VALUE); secondRecord.put("longCol", null); @@ -161,7 +161,7 @@ public void testMetricsForDecimals() throws IOException { required(3, "decimalAsFixed", DecimalType.of(22, 2)) ); - GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); record.put("decimalAsInt32", new BigDecimal("2.55")); record.put("decimalAsInt64", new BigDecimal("4.75")); record.put("decimalAsFixed", new BigDecimal("5.80")); @@ -193,13 +193,13 @@ public void testMetricsForNestedStructFields() throws IOException { required(2, "nestedStructCol", nestedStructType) ); - GenericData.Record leafStruct = new GenericData.Record(AvroSchemaUtil.convert(leafStructType)); + Record leafStruct = new Record(AvroSchemaUtil.convert(leafStructType)); leafStruct.put("leafLongCol", 20L); leafStruct.put("leafBinaryCol", "A".getBytes()); - GenericData.Record nestedStruct = new GenericData.Record(AvroSchemaUtil.convert(nestedStructType)); + Record nestedStruct = new Record(AvroSchemaUtil.convert(nestedStructType)); nestedStruct.put("longCol", 100L); nestedStruct.put("leafStructCol", leafStruct); - GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); record.put("intCol", Integer.MAX_VALUE); record.put("nestedStructCol", nestedStruct); @@ -229,12 +229,12 @@ public void testMetricsForListAndMapElements() throws IOException { optional(5, "mapCol", MapType.ofRequired(6, 7, StringType.get(), structType)) ); - GenericData.Record record = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record record = new Record(AvroSchemaUtil.convert(schema.asStruct())); record.put("intListCol", Lists.newArrayList(10, 11, 12)); - GenericData.Record struct = new GenericData.Record(AvroSchemaUtil.convert(structType)); + Record struct = new Record(AvroSchemaUtil.convert(structType)); struct.put("leafIntCol", 1); struct.put("leafStringCol", "BBB"); - Map map = Maps.newHashMap(); + Map map = Maps.newHashMap(); map.put("4", struct); record.put(1, map); @@ -257,9 +257,9 @@ public void testMetricsForNullColumns() throws IOException { Schema schema = new Schema( optional(1, "intCol", IntegerType.get()) ); - GenericData.Record firstRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record firstRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); firstRecord.put("intCol", null); - GenericData.Record secondRecord = new GenericData.Record(AvroSchemaUtil.convert(schema.asStruct())); + Record secondRecord = new Record(AvroSchemaUtil.convert(schema.asStruct())); secondRecord.put("intCol", null); File recordsFile = writeRecords(schema, firstRecord, secondRecord); @@ -283,11 +283,10 @@ private void assertBounds(int fieldId, Type type, T lowerBound, T upperBound Assert.assertEquals( lowerBound, - lowerBounds.containsKey(fieldId) ? - Conversions.fromByteBuffer(type, lowerBounds.get(fieldId)) : null); + lowerBounds.containsKey(fieldId) ? fromByteBuffer(type, lowerBounds.get(fieldId)) : null); Assert.assertEquals( upperBound, - upperBounds.containsKey(fieldId) ? - Conversions.fromByteBuffer(type, upperBounds.get(fieldId)) : null); + upperBounds.containsKey(fieldId) ? fromByteBuffer(type, upperBounds.get(fieldId)) : null); } + } From be1ac33730e9187bff17f4a4f40de3c52b7771e4 Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Tue, 30 Jul 2019 16:33:33 -0700 Subject: [PATCH 07/10] Fix style check errors Suppress checkstyle for escaping non-printable characters. --- .../apache/iceberg/TestMetricsTruncation.java | 66 ++++++++++--------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java index 1f282c9816d4..ce86065519db 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java +++ b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java @@ -19,12 +19,11 @@ package org.apache.iceberg; +import java.nio.ByteBuffer; +import java.util.Comparator; import org.apache.iceberg.expressions.Literal; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Comparator; import static org.apache.iceberg.util.BinaryUtil.truncateBinaryMax; import static org.apache.iceberg.util.BinaryUtil.truncateBinaryMin; @@ -33,29 +32,29 @@ public class TestMetricsTruncation { @Test - public void testTruncateBinaryMin() throws IOException { + public void testTruncateBinaryMin() { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); // Output of test1 when truncated to 2 bytes - ByteBuffer test1_2_expected = ByteBuffer.wrap(new byte[] {1, 1}); + ByteBuffer test12Expected = ByteBuffer.wrap(new byte[] {1, 1}); ByteBuffer test2 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, 2}); - ByteBuffer test2_2 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); + ByteBuffer test22 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test1) <= 0); Assert.assertTrue("Output must have the first two bytes of the input", - cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test1_2_expected) == 0); + cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test12Expected) == 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateBinaryMin(Literal.of(test1), 5).value(), test1) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test2) <= 0); Assert.assertTrue("Output must have the first two bytes of the input. A lower bound exists " + "even though the first two bytes are the max value", - cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test2_2) == 0); + cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test22) == 0); } @Test - public void testTruncateBinaryMax() throws IOException { + public void testTruncateBinaryMax() { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, 2}); ByteBuffer test2 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); ByteBuffer test3 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, 2}); @@ -84,32 +83,33 @@ public void testTruncateBinaryMax() throws IOException { } @Test - public void testTruncateStringMin() throws IOException { + public void testTruncateStringMin() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters - String test1_2_expected = "イロ"; - String test1_3_expected = "イロハ"; + String test12Expected = "イロ"; + String test13Expected = "イロハ"; String test2 = "щщаεはчωいにπάほхεろへσκζ"; - String test2_7_expected = "щщаεはчω"; + String test27Expected = "щщаεはчω"; + // CHECKSTYLE:OFF // U+FFFF is max 3 byte UTF-8 character String test3 = "\uFFFF\uFFFF"; // test4 consists of 2 4 byte UTF-8 characters String test4 = "\uD800\uDC00\uD800\uDC00"; - String test4_1_expected = "\uD800\uDC00"; - + String test41Expected = "\uD800\uDC00"; + // CHECKSTYLE:ON Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test1) <= 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateStringMin(Literal.of(test1), 8).value(), test1) == 0); Assert.assertTrue("Output must have the first two characters of the input", - cmp.compare(truncateStringMin(Literal.of(test1), 2).value(), test1_2_expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test1), 2).value(), test12Expected) == 0); Assert.assertTrue("Output must have the first three characters of the input", - cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test1_3_expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test13Expected) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test2), 16).value(), test2) <= 0); Assert.assertTrue("Output must have the first seven characters of the input", - cmp.compare(truncateStringMin(Literal.of(test2), 7).value(), test2_7_expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test2), 7).value(), test27Expected) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test3), 2).value(), test3) <= 0); Assert.assertTrue("No truncation required as truncate length is equal to the input size", @@ -117,27 +117,29 @@ public void testTruncateStringMin() throws IOException { Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test4) <= 0); Assert.assertTrue("Output must have the first 4 byte UTF-8 character of the input", - cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test4_1_expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test41Expected) == 0); } @Test - public void testTruncateStringMax() throws IOException { + public void testTruncateStringMax() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters - String test1_2_expected = "イヮ"; - String test1_3_expected = "イロバ"; + String test12Expected = "イヮ"; + String test13Expected = "イロバ"; String test2 = "щщаεはчωいにπάほхεろへσκζ"; - String test2_7_expected = "щщаεはчϊ"; + String test27Expected = "щщаεはчϊ"; String test3 = "aनि\uFFFF\uFFFF"; - String test3_3_expected = "aनी"; + String test33Expected = "aनी"; // U+FFFF is max 3 byte UTF-8 character + // CHECKSTYLE:OFF String test4 = "\uFFFF\uFFFF"; - String test4_1_expected = "\uD800\uDC00"; + String test41Expected = "\uD800\uDC00"; // test5 consists of 2 4 byte max UTF-8 characters String test5 = "\uDBFF\uDFFF\uDBFF\uDFFF"; String test6 = "\uD800\uDFFF\uD800\uDFFF"; // Increment the previous character - String test6_2_expected = "\uD801\uDC00"; + String test62Expected = "\uD801\uDC00"; + // CHECKSTYLE:ON Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", @@ -146,34 +148,34 @@ public void testTruncateStringMax() throws IOException { cmp.compare(truncateStringMax(Literal.of(test1), 7).value(), test1) == 0); Assert.assertTrue("Output must have two characters and the second character of the input must " + "be incremented", cmp.compare( - truncateStringMax(Literal.of(test1), 2).value(), test1_2_expected) == 0); + truncateStringMax(Literal.of(test1), 2).value(), test12Expected) == 0); Assert.assertTrue("Output must have three characters and the third character of the input must " + "be incremented", cmp.compare( - truncateStringMax(Literal.of(test1), 3).value(), test1_3_expected) == 0); + truncateStringMax(Literal.of(test1), 3).value(), test13Expected) == 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateStringMax(Literal.of(test1), 8).value(), test1) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper " + "bound", cmp.compare(truncateStringMax(Literal.of(test2), 8).value(), test2) >= 0); Assert.assertTrue("Output must have seven characters and the seventh character of the input " + "must be incremented", cmp.compare( - truncateStringMax(Literal.of(test2), 7).value(), test2_7_expected) == 0); + truncateStringMax(Literal.of(test2), 7).value(), test27Expected) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper " + "bound", cmp.compare(truncateStringMax(Literal.of(test3), 3).value(), test3) >= 0); Assert.assertTrue("Output must have three characters and the third character of the input must " + "be incremented. The second perceivable character in this string is actually a glyph. It consists of " + "two unicode characters", cmp.compare( - truncateStringMax(Literal.of(test3), 3).value(), test3_3_expected) == 0); + truncateStringMax(Literal.of(test3), 3).value(), test33Expected) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test4) >= 0); Assert.assertTrue("Output must have one character. Since the first character is the max 3 byte " + "UTF-8 character, it should be incremented to the lowest 4 byte UTF-8 character", - cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test4_1_expected) == 0); + cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test41Expected) == 0); Assert.assertNull("An upper bound doesn't exist since the first two characters are max UTF-8 " + "characters", truncateStringMax(Literal.of(test5), 1)); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", cmp.compare(truncateStringMax(Literal.of(test6), 2).value(), test6) >= 0); Assert.assertTrue("Test 4 byte UTF-8 character increment. Output must have one character with " + "the first character incremented", cmp.compare( - truncateStringMax(Literal.of(test6), 1).value(), test6_2_expected) == 0); + truncateStringMax(Literal.of(test6), 1).value(), test62Expected) == 0); } } From 629be817ccf32a678da60a1a9a77408120249aab Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Wed, 31 Jul 2019 14:25:21 -0700 Subject: [PATCH 08/10] Use testArtifacts in subprojects --- build.gradle | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/build.gradle b/build.gradle index f3bc29bec3ab..4f9c40387e0e 100644 --- a/build.gradle +++ b/build.gradle @@ -77,6 +77,8 @@ subprojects { all { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } + + testArtifacts } ext { @@ -95,6 +97,15 @@ subprojects { testCompile 'org.mockito:mockito-core' } + task testJar(type: Jar){ + archiveClassifier = 'tests' + from sourceSets.test.output + } + + artifacts { + testArtifacts testJar + } + publishing { publications { nebula(MavenPublication) { @@ -231,19 +242,6 @@ project(':iceberg-core') { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } } - - task testJar(type: Jar){ - archiveClassifier = 'tests' - from sourceSets.test.output - } - - configurations { - testArtifacts - } - - artifacts { - testArtifacts testJar - } } project(':iceberg-data') { @@ -297,19 +295,6 @@ project(':iceberg-hive') { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } } - - task testJar(type: Jar){ - archiveClassifier = 'tests' - from sourceSets.test.output - } - - configurations { - testArtifacts - } - - artifacts { - testArtifacts testJar - } } project(':iceberg-orc') { From 7f0814221eb9fb0b181c2b45b60a52cfb4f15bc4 Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Wed, 31 Jul 2019 16:03:45 -0700 Subject: [PATCH 09/10] Use SuppressWarnings on checkstyle --- .../apache/iceberg/TestMetricsTruncation.java | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java index ce86065519db..7a999048ba30 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java +++ b/core/src/test/java/org/apache/iceberg/TestMetricsTruncation.java @@ -30,27 +30,28 @@ import static org.apache.iceberg.util.UnicodeUtil.truncateStringMax; import static org.apache.iceberg.util.UnicodeUtil.truncateStringMin; +@SuppressWarnings("checkstyle:LocalVariableName") public class TestMetricsTruncation { @Test public void testTruncateBinaryMin() { ByteBuffer test1 = ByteBuffer.wrap(new byte[] {1, 1, (byte) 0xFF, 2}); // Output of test1 when truncated to 2 bytes - ByteBuffer test12Expected = ByteBuffer.wrap(new byte[] {1, 1}); + ByteBuffer test1_2_expected = ByteBuffer.wrap(new byte[] {1, 1}); ByteBuffer test2 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, 2}); - ByteBuffer test22 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); + ByteBuffer test2_2 = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test1) <= 0); Assert.assertTrue("Output must have the first two bytes of the input", - cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test12Expected) == 0); + cmp.compare(truncateBinaryMin(Literal.of(test1), 2).value(), test1_2_expected) == 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateBinaryMin(Literal.of(test1), 5).value(), test1) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test2) <= 0); Assert.assertTrue("Output must have the first two bytes of the input. A lower bound exists " + "even though the first two bytes are the max value", - cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test22) == 0); + cmp.compare(truncateBinaryMin(Literal.of(test2), 2).value(), test2_2) == 0); } @Test @@ -82,34 +83,33 @@ public void testTruncateBinaryMax() { cmp.compare(truncateBinaryMax(Literal.of(test4), 2).value(), expectedOutput) == 0); } + @SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters") @Test public void testTruncateStringMin() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters - String test12Expected = "イロ"; - String test13Expected = "イロハ"; + String test1_2_expected = "イロ"; + String test1_3_expected = "イロハ"; String test2 = "щщаεはчωいにπάほхεろへσκζ"; - String test27Expected = "щщаεはчω"; - // CHECKSTYLE:OFF + String test2_7_expected = "щщаεはчω"; // U+FFFF is max 3 byte UTF-8 character String test3 = "\uFFFF\uFFFF"; // test4 consists of 2 4 byte UTF-8 characters String test4 = "\uD800\uDC00\uD800\uDC00"; - String test41Expected = "\uD800\uDC00"; - // CHECKSTYLE:ON + String test4_1_expected = "\uD800\uDC00"; Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test1) <= 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateStringMin(Literal.of(test1), 8).value(), test1) == 0); Assert.assertTrue("Output must have the first two characters of the input", - cmp.compare(truncateStringMin(Literal.of(test1), 2).value(), test12Expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test1), 2).value(), test1_2_expected) == 0); Assert.assertTrue("Output must have the first three characters of the input", - cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test13Expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test1), 3).value(), test1_3_expected) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test2), 16).value(), test2) <= 0); Assert.assertTrue("Output must have the first seven characters of the input", - cmp.compare(truncateStringMin(Literal.of(test2), 7).value(), test27Expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test2), 7).value(), test2_7_expected) == 0); Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test3), 2).value(), test3) <= 0); Assert.assertTrue("No truncation required as truncate length is equal to the input size", @@ -117,29 +117,28 @@ public void testTruncateStringMin() { Assert.assertTrue("Truncated lower bound should be lower than or equal to the actual lower bound", cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test4) <= 0); Assert.assertTrue("Output must have the first 4 byte UTF-8 character of the input", - cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test41Expected) == 0); + cmp.compare(truncateStringMin(Literal.of(test4), 1).value(), test4_1_expected) == 0); } + @SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters") @Test public void testTruncateStringMax() { String test1 = "イロハニホヘト"; // Output of test1 when truncated to 2 unicode characters - String test12Expected = "イヮ"; - String test13Expected = "イロバ"; + String test1_2_expected = "イヮ"; + String test1_3_expected = "イロバ"; String test2 = "щщаεはчωいにπάほхεろへσκζ"; - String test27Expected = "щщаεはчϊ"; + String test2_7_expected = "щщаεはчϊ"; String test3 = "aनि\uFFFF\uFFFF"; - String test33Expected = "aनी"; + String test3_3_expected = "aनी"; // U+FFFF is max 3 byte UTF-8 character - // CHECKSTYLE:OFF String test4 = "\uFFFF\uFFFF"; - String test41Expected = "\uD800\uDC00"; + String test4_1_expected = "\uD800\uDC00"; // test5 consists of 2 4 byte max UTF-8 characters String test5 = "\uDBFF\uDFFF\uDBFF\uDFFF"; String test6 = "\uD800\uDFFF\uD800\uDFFF"; // Increment the previous character - String test62Expected = "\uD801\uDC00"; - // CHECKSTYLE:ON + String test6_2_expected = "\uD801\uDC00"; Comparator cmp = Literal.of(test1).comparator(); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", @@ -148,34 +147,34 @@ public void testTruncateStringMax() { cmp.compare(truncateStringMax(Literal.of(test1), 7).value(), test1) == 0); Assert.assertTrue("Output must have two characters and the second character of the input must " + "be incremented", cmp.compare( - truncateStringMax(Literal.of(test1), 2).value(), test12Expected) == 0); + truncateStringMax(Literal.of(test1), 2).value(), test1_2_expected) == 0); Assert.assertTrue("Output must have three characters and the third character of the input must " + "be incremented", cmp.compare( - truncateStringMax(Literal.of(test1), 3).value(), test13Expected) == 0); + truncateStringMax(Literal.of(test1), 3).value(), test1_3_expected) == 0); Assert.assertTrue("No truncation required as truncate length is greater than the input size", cmp.compare(truncateStringMax(Literal.of(test1), 8).value(), test1) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper " + "bound", cmp.compare(truncateStringMax(Literal.of(test2), 8).value(), test2) >= 0); Assert.assertTrue("Output must have seven characters and the seventh character of the input " + "must be incremented", cmp.compare( - truncateStringMax(Literal.of(test2), 7).value(), test27Expected) == 0); + truncateStringMax(Literal.of(test2), 7).value(), test2_7_expected) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper " + "bound", cmp.compare(truncateStringMax(Literal.of(test3), 3).value(), test3) >= 0); Assert.assertTrue("Output must have three characters and the third character of the input must " + "be incremented. The second perceivable character in this string is actually a glyph. It consists of " + "two unicode characters", cmp.compare( - truncateStringMax(Literal.of(test3), 3).value(), test33Expected) == 0); + truncateStringMax(Literal.of(test3), 3).value(), test3_3_expected) == 0); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test4) >= 0); Assert.assertTrue("Output must have one character. Since the first character is the max 3 byte " + "UTF-8 character, it should be incremented to the lowest 4 byte UTF-8 character", - cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test41Expected) == 0); + cmp.compare(truncateStringMax(Literal.of(test4), 1).value(), test4_1_expected) == 0); Assert.assertNull("An upper bound doesn't exist since the first two characters are max UTF-8 " + "characters", truncateStringMax(Literal.of(test5), 1)); Assert.assertTrue("Truncated upper bound should be greater than or equal to the actual upper bound", cmp.compare(truncateStringMax(Literal.of(test6), 2).value(), test6) >= 0); Assert.assertTrue("Test 4 byte UTF-8 character increment. Output must have one character with " + "the first character incremented", cmp.compare( - truncateStringMax(Literal.of(test6), 1).value(), test62Expected) == 0); + truncateStringMax(Literal.of(test6), 1).value(), test6_2_expected) == 0); } } From a9ba3f2d6b0511591b4300d62639a447733c63fe Mon Sep 17 00:00:00 2001 From: Edgar Rodriguez Date: Wed, 31 Jul 2019 16:06:39 -0700 Subject: [PATCH 10/10] Update docs --- .../org/apache/iceberg/parquet/ParquetWritingTestUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java index c3036cab8791..f42bdbdc6406 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java @@ -36,7 +36,7 @@ import static org.apache.iceberg.Files.localOutput; /** - * Base utility test class for tests that need to write Parquet files + * Utilities for tests that need to write Parquet files. */ class ParquetWritingTestUtils {