From ac5c8a5e9d9740a07d8d1a984e246c6fcebb3ceb Mon Sep 17 00:00:00 2001 From: huzheng Date: Wed, 22 Apr 2020 20:06:54 +0800 Subject: [PATCH 1/2] Move the currentPath & path methods to the parent class for removing the duplicated codes --- .../data/parquet/GenericParquetReaders.java | 27 ------------------- .../data/parquet/GenericParquetWriter.java | 27 ------------------- .../parquet/ParquetAvroValueReaders.java | 27 ------------------- .../iceberg/parquet/ParquetAvroWriter.java | 27 ------------------- .../iceberg/parquet/ParquetTypeVisitor.java | 8 ++++++ .../apache/iceberg/parquet/ParquetUtil.java | 25 +++++++++++++++++ .../parquet/TypeWithSchemaVisitor.java | 8 ++++++ .../apache/iceberg/pig/PigParquetReader.java | 27 ------------------- .../spark/data/SparkParquetReaders.java | 27 ------------------- .../spark/data/SparkParquetWriters.java | 26 ------------------ 10 files changed, 41 insertions(+), 188 deletions(-) diff --git a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java index bc6767f7470f..acb35ed29ca6 100644 --- a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java +++ b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java @@ -30,7 +30,6 @@ import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; -import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.iceberg.Schema; @@ -297,32 +296,6 @@ public ParquetValueReader primitive(org.apache.iceberg.types.Type.PrimitiveTy MessageType type() { return type; } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - protected String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); diff --git a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java index 922a9701fb95..d20db91af8c4 100644 --- a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java +++ b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java @@ -27,7 +27,6 @@ import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; -import java.util.Iterator; import java.util.List; import org.apache.iceberg.data.Record; import org.apache.iceberg.parquet.ParquetTypeVisitor; @@ -172,32 +171,6 @@ public ParquetValueWriter primitive(PrimitiveType primitive) { throw new UnsupportedOperationException("Unsupported type: " + primitive); } } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - private String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java index a26fe92a12b8..6a2727456609 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java @@ -26,7 +26,6 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.UUID; @@ -231,32 +230,6 @@ public ParquetValueReader primitive(org.apache.iceberg.types.Type.PrimitiveTy throw new UnsupportedOperationException("Unsupported type: " + primitive); } } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - private String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } static class DecimalReader extends ParquetValueReaders.PrimitiveReader { diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java index b76a88de9e12..a900669cd09a 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java @@ -20,7 +20,6 @@ package org.apache.iceberg.parquet; import com.google.common.collect.Lists; -import java.util.Iterator; import java.util.List; import org.apache.avro.generic.GenericData.Fixed; import org.apache.avro.generic.IndexedRecord; @@ -163,32 +162,6 @@ public ParquetValueWriter primitive(PrimitiveType primitive) { throw new UnsupportedOperationException("Unsupported type: " + primitive); } } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - private String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static class FixedWriter extends PrimitiveWriter { diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java index 9d8305314826..64c016bf433d 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java @@ -160,4 +160,12 @@ public T map(GroupType map, T key, T value) { public T primitive(PrimitiveType primitive) { return null; } + + protected String[] currentPath() { + return ParquetUtil.currentPath(fieldNames); + } + + protected String[] path(String name) { + return ParquetUtil.path(fieldNames, name); + } } diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java index 09726a523f19..4fa77dbcb453 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java @@ -25,6 +25,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -287,4 +288,28 @@ public static boolean isIntType(PrimitiveType primitiveType) { } return primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32; } + + public static String[] currentPath(Deque fieldNames) { + String[] path = new String[fieldNames.size()]; + if (!fieldNames.isEmpty()) { + Iterator iter = fieldNames.descendingIterator(); + for (int i = 0; iter.hasNext(); i += 1) { + path[i] = iter.next(); + } + } + return path; + } + + public static String[] path(Deque fieldNames, String name) { + String[] path = new String[fieldNames.size() + 1]; + path[fieldNames.size()] = name; + + if (!fieldNames.isEmpty()) { + Iterator iter = fieldNames.descendingIterator(); + for (int i = 0; iter.hasNext(); i += 1) { + path[i] = iter.next(); + } + } + return path; + } } diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java index 7b741c304d9e..a99afad2b22d 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java @@ -192,4 +192,12 @@ public T primitive(org.apache.iceberg.types.Type.PrimitiveType iPrimitive, PrimitiveType primitive) { return null; } + + protected String[] currentPath() { + return ParquetUtil.currentPath(fieldNames); + } + + protected String[] path(String name) { + return ParquetUtil.path(fieldNames, name); + } } diff --git a/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java b/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java index aa3f1dc7d6a4..fd9e9ac2dfe4 100644 --- a/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java +++ b/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java @@ -26,7 +26,6 @@ import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -273,32 +272,6 @@ public ParquetValueReader primitive( throw new UnsupportedOperationException("Unsupported type: " + primitive); } } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - protected String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static class DateReader extends PrimitiveReader { diff --git a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java index 190b708db14d..60a41380e42a 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java +++ b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java @@ -27,7 +27,6 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.Arrays; -import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.iceberg.Schema; @@ -281,35 +280,9 @@ public ParquetValueReader primitive(org.apache.iceberg.types.Type.PrimitiveTy } } - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - protected MessageType type() { return type; } - - protected String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static class BinaryDecimalReader extends PrimitiveReader { diff --git a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java index 3cd0d5ef2451..52ebd823335a 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java +++ b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java @@ -185,32 +185,6 @@ public ParquetValueWriter primitive(PrimitiveType primitive) { throw new UnsupportedOperationException("Unsupported type: " + primitive); } } - - private String[] currentPath() { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } - - private String[] path(String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - - return path; - } } private static PrimitiveWriter utf8Strings(ColumnDescriptor desc) { From dff8440220ef7e1942430529af681fd4165373e3 Mon Sep 17 00:00:00 2001 From: huzheng Date: Thu, 23 Apr 2020 10:11:29 +0800 Subject: [PATCH 2/2] Move the currentPath&path out of ParquetUtil.java --- .../iceberg/parquet/ParquetTypeVisitor.java | 6 +++-- .../apache/iceberg/parquet/ParquetUtil.java | 25 ------------------- .../parquet/TypeWithSchemaVisitor.java | 6 +++-- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java index 64c016bf433d..6b68c84edb7e 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java @@ -162,10 +162,12 @@ public T primitive(PrimitiveType primitive) { } protected String[] currentPath() { - return ParquetUtil.currentPath(fieldNames); + return Lists.newArrayList(fieldNames.descendingIterator()).toArray(new String[0]); } protected String[] path(String name) { - return ParquetUtil.path(fieldNames, name); + List list = Lists.newArrayList(fieldNames.descendingIterator()); + list.add(name); + return list.toArray(new String[0]); } } diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java index 4fa77dbcb453..09726a523f19 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java @@ -25,7 +25,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; -import java.util.Deque; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -288,28 +287,4 @@ public static boolean isIntType(PrimitiveType primitiveType) { } return primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32; } - - public static String[] currentPath(Deque fieldNames) { - String[] path = new String[fieldNames.size()]; - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - return path; - } - - public static String[] path(Deque fieldNames, String name) { - String[] path = new String[fieldNames.size() + 1]; - path[fieldNames.size()] = name; - - if (!fieldNames.isEmpty()) { - Iterator iter = fieldNames.descendingIterator(); - for (int i = 0; iter.hasNext(); i += 1) { - path[i] = iter.next(); - } - } - return path; - } } diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java index a99afad2b22d..a13705db9a22 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java @@ -194,10 +194,12 @@ public T primitive(org.apache.iceberg.types.Type.PrimitiveType iPrimitive, } protected String[] currentPath() { - return ParquetUtil.currentPath(fieldNames); + return Lists.newArrayList(fieldNames.descendingIterator()).toArray(new String[0]); } protected String[] path(String name) { - return ParquetUtil.path(fieldNames, name); + List list = Lists.newArrayList(fieldNames.descendingIterator()); + list.add(name); + return list.toArray(new String[0]); } }