From 244bb6adcab5f312bedf3f58292609fa2cc01460 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Thu, 29 Mar 2018 00:51:39 +0200 Subject: [PATCH 01/12] PARQUET-1253: Support for new logical type representation --- .../cascading/TestParquetTBaseScheme.java | 7 +- .../parquet/schema/OriginalLogicalType.java | 638 ++++++++++++++++++ .../apache/parquet/schema/PrimitiveType.java | 5 + .../java/org/apache/parquet/schema/Type.java | 22 +- .../java/org/apache/parquet/schema/Types.java | 25 +- .../converter/ParquetMetadataConverter.java | 137 +++- .../hadoop/metadata/ParquetMetadata.java | 13 +- .../TestParquetMetadataConverter.java | 18 +- 8 files changed, 799 insertions(+), 66 deletions(-) create mode 100644 parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java diff --git a/parquet-cascading3/src/test/java/org/apache/parquet/cascading/TestParquetTBaseScheme.java b/parquet-cascading3/src/test/java/org/apache/parquet/cascading/TestParquetTBaseScheme.java index 7b9f817e3c..97b2ccf998 100644 --- a/parquet-cascading3/src/test/java/org/apache/parquet/cascading/TestParquetTBaseScheme.java +++ b/parquet-cascading3/src/test/java/org/apache/parquet/cascading/TestParquetTBaseScheme.java @@ -40,14 +40,12 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.thrift.protocol.TCompactProtocol; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.protocol.TProtocolFactory; import org.apache.thrift.transport.TIOStreamTransport; import org.junit.Test; -import static org.junit.Assert.*; import org.apache.parquet.hadoop.thrift.ThriftToParquetFileWriter; import org.apache.parquet.hadoop.util.ContextUtil; @@ -55,8 +53,9 @@ import java.io.File; import java.io.ByteArrayOutputStream; -import java.util.HashMap; -import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; public class TestParquetTBaseScheme { final String txtInputPath = "target/test-classes/names.txt"; diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java new file mode 100644 index 0000000000..c635c5f1f3 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java @@ -0,0 +1,638 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.schema; + +import org.apache.parquet.ShouldNeverHappenException; +import org.apache.parquet.format.BsonType; +import org.apache.parquet.format.DateType; +import org.apache.parquet.format.DecimalType; +import org.apache.parquet.format.EnumType; +import org.apache.parquet.format.IntType; +import org.apache.parquet.format.JsonType; +import org.apache.parquet.format.ListType; +import org.apache.parquet.format.LogicalType; +import org.apache.parquet.format.MapType; +import org.apache.parquet.format.MicroSeconds; +import org.apache.parquet.format.MilliSeconds; +import org.apache.parquet.format.NullType; +import org.apache.parquet.format.StringType; +import org.apache.parquet.format.TimeType; +import org.apache.parquet.format.TimestampType; + +import java.util.Objects; + +public interface OriginalLogicalType { + /** + * Convert this parquet-mr logical type to parquet-format LogicalType. + * + * @return the parquet-format representation of this logical type implementation + */ + LogicalType toLogicalType(); + + /** + * Convert this logical type to old logical type representation in parquet-mr (if there's any). + * Those logical type implementations, which don't have a corresponding mapping should return null. + * + * @return the OriginalType representation of the new logical type, or null if there's none + */ + OriginalType toOriginalType(); + + /** + * Helper method to convert the old representation of logical types (OriginalType) to new logical type. + */ + static OriginalLogicalType fromOriginalType(OriginalType originalType) { + if (originalType == null) { + return null; + } + switch (originalType) { + case UTF8: + return OriginalLogicalType.StringLogicalType.create(); + case MAP: + return OriginalLogicalType.MapLogicalType.create(); + case DECIMAL: + return OriginalLogicalType.DecimalLogicalType.create(); + case LIST: + return OriginalLogicalType.ListLogicalType.create(); + case DATE: + return OriginalLogicalType.DateLogicalType.create(); + case INTERVAL: + return OriginalLogicalType.IntervalLogicalType.create(); + case TIMESTAMP_MILLIS: + return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + case TIMESTAMP_MICROS: + return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + case TIME_MILLIS: + return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + case TIME_MICROS: + return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + case UINT_8: + return OriginalLogicalType.IntLogicalType.create((byte) 8, false); + case UINT_16: + return OriginalLogicalType.IntLogicalType.create((byte) 16, false); + case UINT_32: + return OriginalLogicalType.IntLogicalType.create((byte) 32, false); + case UINT_64: + return OriginalLogicalType.IntLogicalType.create((byte) 64, false); + case INT_8: + return OriginalLogicalType.IntLogicalType.create((byte) 8, true); + case INT_16: + return OriginalLogicalType.IntLogicalType.create((byte) 16, true); + case INT_32: + return OriginalLogicalType.IntLogicalType.create((byte) 32, true); + case INT_64: + return OriginalLogicalType.IntLogicalType.create((byte) 64, true); + case ENUM: + return OriginalLogicalType.EnumLogicalType.create(); + case JSON: + return OriginalLogicalType.JsonLogicalType.create(); + case BSON: + return OriginalLogicalType.BsonLogicalType.create(); + case MAP_KEY_VALUE: + return OriginalLogicalType.MapKeyValueType.create(); + default: + return OriginalLogicalType.NullLogicalType.create(); + } + } + + class StringLogicalType implements OriginalLogicalType { + private static final StringLogicalType INSTANCE = new StringLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private StringLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.STRING(new StringType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.UTF8; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof StringLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class MapLogicalType implements OriginalLogicalType { + private static final MapLogicalType INSTANCE = new MapLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private MapLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.MAP(new MapType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.MAP; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof MapLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class ListLogicalType implements OriginalLogicalType { + private static final ListLogicalType INSTANCE = new ListLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private ListLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.LIST(new ListType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.LIST; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof ListLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class EnumLogicalType implements OriginalLogicalType { + private static final EnumLogicalType INSTANCE = new EnumLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private EnumLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.ENUM(new EnumType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.ENUM; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof EnumLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class DecimalLogicalType implements OriginalLogicalType { + + private int scale; + private int precision; + + public static OriginalLogicalType create() { + return new DecimalLogicalType(0, 0); + } + + public static OriginalLogicalType create(int scale, int precision) { + return new DecimalLogicalType(scale, precision); + } + + private DecimalLogicalType(int scale, int precision) { + this.scale = scale; + this.precision = precision; + } + + public void setPrecision(int precision) { + this.precision = precision; + } + + public void setScale(int scale) { + this.scale = scale; + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.DECIMAL(new DecimalType(scale, precision)); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.DECIMAL; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof DecimalLogicalType)) { + return false; + } + DecimalLogicalType other = (DecimalLogicalType) obj; + return scale == other.scale && precision == other.precision; + } + + @Override + public int hashCode() { + return Objects.hash(scale, precision); + } + } + + class DateLogicalType implements OriginalLogicalType { + private static final DateLogicalType INSTANCE = new DateLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private DateLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.DATE(new DateType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.DATE; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof DateLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + enum TimeUnit { + MILLIS, + MICROS + } + + static org.apache.parquet.format.TimeUnit convertUnit(TimeUnit unit) { + switch (unit) { + case MICROS: + return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds()); + case MILLIS: + return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); + default: + throw new ShouldNeverHappenException(); + } + } + + class TimeLogicalType implements OriginalLogicalType { + private final boolean isAdjustedToUTC; + private final TimeUnit unit; + + public static OriginalLogicalType create(boolean isAdjustedToUTC, TimeUnit unit) { + return new TimeLogicalType(isAdjustedToUTC, unit); + } + + private TimeLogicalType(boolean isAdjustedToUTC, TimeUnit unit) { + this.isAdjustedToUTC = isAdjustedToUTC; + this.unit = unit; + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.TIME(new TimeType(isAdjustedToUTC, convertUnit(unit))); + } + + @Override + public OriginalType toOriginalType() { + switch (unit) { + case MILLIS: + return OriginalType.TIME_MILLIS; + case MICROS: + return OriginalType.TIME_MICROS; + } + + throw new ShouldNeverHappenException(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof TimeLogicalType)) { + return false; + } + TimeLogicalType other = (TimeLogicalType) obj; + return isAdjustedToUTC == other.isAdjustedToUTC && unit == other.unit; + } + + @Override + public int hashCode() { + return Objects.hash(isAdjustedToUTC, unit); + } + } + + class TimestampLogicalType implements OriginalLogicalType { + private final boolean isAdjustedToUTC; + private final TimeUnit unit; + + public static OriginalLogicalType create(boolean isAdjustedToUTC, TimeUnit unit) { + return new TimestampLogicalType(isAdjustedToUTC, unit); + } + + private TimestampLogicalType(boolean isAdjustedToUTC, TimeUnit unit) { + this.isAdjustedToUTC = isAdjustedToUTC; + this.unit = unit; + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.TIMESTAMP(new TimestampType(isAdjustedToUTC, convertUnit(unit))); + } + + @Override + public OriginalType toOriginalType() { + switch (unit) { + case MILLIS: + return OriginalType.TIMESTAMP_MILLIS; + case MICROS: + return OriginalType.TIMESTAMP_MICROS; + default: + throw new ShouldNeverHappenException(); + } + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof TimestampLogicalType)) { + return false; + } + TimestampLogicalType other = (TimestampLogicalType) obj; + return (isAdjustedToUTC == other.isAdjustedToUTC) && (unit == other.unit); + } + + @Override + public int hashCode() { + return Objects.hash(isAdjustedToUTC, unit); + } + } + + class IntLogicalType implements OriginalLogicalType { + private final byte bitWidth; + private final boolean isSigned; + + public static OriginalLogicalType create(byte bitWidth, boolean isSigned) { + return new IntLogicalType(bitWidth, isSigned); + } + + private IntLogicalType(byte bitWidth, boolean isSigned) { + this.bitWidth = bitWidth; + this.isSigned = isSigned; + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.INTEGER(new IntType(bitWidth, isSigned)); + } + + @Override + public OriginalType toOriginalType() { + switch (bitWidth) { + case 8: + return isSigned ? OriginalType.INT_8 : OriginalType.UINT_8; + case 16: + return isSigned ? OriginalType.INT_16 : OriginalType.UINT_16; + case 32: + return isSigned ? OriginalType.INT_32 : OriginalType.UINT_32; + case 64: + return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64; + default: + throw new ShouldNeverHappenException(); + } + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof IntLogicalType)) { + return false; + } + IntLogicalType other = (IntLogicalType) obj; + return (bitWidth == other.bitWidth) && (isSigned == other.isSigned); + } + + @Override + public int hashCode() { + return Objects.hash(bitWidth, isSigned); + } + } + + class NullLogicalType implements OriginalLogicalType { + private static final NullLogicalType INSTANCE = new NullLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private NullLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.UNKNOWN(new NullType()); + } + + @Override + public OriginalType toOriginalType() { + return null; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof NullLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class JsonLogicalType implements OriginalLogicalType { + private static final JsonLogicalType INSTANCE = new JsonLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private JsonLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.JSON(new JsonType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.JSON; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof JsonLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class BsonLogicalType implements OriginalLogicalType { + private static final BsonLogicalType INSTANCE = new BsonLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private BsonLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.BSON(new BsonType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.BSON; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof BsonLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class IntervalLogicalType implements OriginalLogicalType { + private static IntervalLogicalType INSTANCE = new IntervalLogicalType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private IntervalLogicalType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.UNKNOWN(new NullType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.INTERVAL; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof IntervalLogicalType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } + + class MapKeyValueType implements OriginalLogicalType { + private static MapKeyValueType INSTANCE = new MapKeyValueType(); + + public static OriginalLogicalType create() { + return INSTANCE; + } + + private MapKeyValueType() { + } + + @Override + public LogicalType toLogicalType() { + return LogicalType.UNKNOWN(new NullType()); + } + + @Override + public OriginalType toOriginalType() { + return OriginalType.MAP_KEY_VALUE; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof MapKeyValueType; + } + + @Override + public int hashCode() { + // This type doesn't have any parameters, thus use class hashcode + return getClass().hashCode(); + } + } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index a4211738bf..d62ea26e9b 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -450,6 +450,11 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this.primitive = primitive; this.length = length; this.decimalMeta = decimalMeta; + if (originalType == OriginalType.DECIMAL) { + OriginalLogicalType.DecimalLogicalType originalLogicalType = (OriginalLogicalType.DecimalLogicalType) getOriginalLogicalType(); + originalLogicalType.setPrecision(decimalMeta.getPrecision()); + originalLogicalType.setScale(decimalMeta.getScale()); + } if (columnOrder == null) { columnOrder = primitive == PrimitiveTypeName.INT96 || originalType == OriginalType.INTERVAL diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index dd2c38da32..d7d0a22387 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -119,7 +119,7 @@ public boolean isMoreRestrictiveThan(Repetition other) { private final String name; private final Repetition repetition; - private final OriginalType originalType; + private final OriginalLogicalType originalLogicalType; private final ID id; /** @@ -151,7 +151,7 @@ public Type(String name, Repetition repetition, OriginalType originalType) { super(); this.name = checkNotNull(name, "name"); this.repetition = checkNotNull(repetition, "repetition"); - this.originalType = originalType; + this.originalLogicalType = originalType == null ? null : OriginalLogicalType.fromOriginalType(originalType); this.id = id; } @@ -190,11 +190,15 @@ public ID getId() { return id; } + public OriginalLogicalType getOriginalLogicalType() { + return originalLogicalType; + } + /** * @return the original type (LIST, MAP, ...) */ public OriginalType getOriginalType() { - return originalType; + return originalLogicalType == null ? null : originalLogicalType.toOriginalType(); } /** @@ -247,8 +251,8 @@ public PrimitiveType asPrimitiveType() { public int hashCode() { int c = repetition.hashCode(); c = 31 * c + name.hashCode(); - if (originalType != null) { - c = 31 * c + originalType.hashCode(); + if (originalLogicalType != null) { + c = 31 * c + originalLogicalType.hashCode(); } if (id != null) { c = 31 * c + id.hashCode(); @@ -262,7 +266,7 @@ protected boolean equals(Type other) { && repetition == other.repetition && eqOrBothNull(repetition, other.repetition) && eqOrBothNull(id, other.id) - && eqOrBothNull(originalType, other.originalType); + && eqOrBothNull(originalLogicalType, other.originalLogicalType); }; @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 0422a9d431..ed96cb2f88 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -198,7 +198,7 @@ public abstract static class Builder { protected final Class returnClass; protected Type.Repetition repetition = null; - protected OriginalType originalType = null; + protected OriginalLogicalType originalLogicalType = null; protected Type.ID id = null; private boolean repetitionAlreadySet = false; @@ -252,7 +252,12 @@ protected final THIS repetition(Type.Repetition repetition) { * @return this builder for method chaining */ public THIS as(OriginalType type) { - this.originalType = type; + this.originalLogicalType = OriginalLogicalType.fromOriginalType(type); + return self(); + } + + public THIS as(OriginalLogicalType type) { + this.originalLogicalType = type; return self(); } @@ -303,6 +308,9 @@ public P named(String name) { } } + protected OriginalType getOriginalType () { + return originalLogicalType == null ? null : originalLogicalType.toOriginalType(); + } } public abstract static class @@ -402,7 +410,8 @@ protected PrimitiveType build(String name) { DecimalMetadata meta = decimalMetadata(); // validate type annotations and required metadata - if (originalType != null) { + if (originalLogicalType != null) { + OriginalType originalType = originalLogicalType.toOriginalType(); switch (originalType) { case UTF8: case JSON: @@ -475,7 +484,7 @@ protected PrimitiveType build(String name) { } } - return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id, columnOrder); + return new PrimitiveType(repetition, primitiveType, length, name, getOriginalType(), meta, id, columnOrder); } private static long maxPrecision(int numBytes) { @@ -488,7 +497,7 @@ private static long maxPrecision(int numBytes) { protected DecimalMetadata decimalMetadata() { DecimalMetadata meta = null; - if (OriginalType.DECIMAL == originalType) { + if (OriginalType.DECIMAL == getOriginalType()) { Preconditions.checkArgument(precision > 0, "Invalid DECIMAL precision: " + precision); Preconditions.checkArgument(scale >= 0, @@ -648,7 +657,7 @@ public THIS addFields(Type... types) { @Override protected GroupType build(String name) { - return new GroupType(repetition, name, originalType, fields, id); + return new GroupType(repetition, name, getOriginalType(), fields, id); } public MapBuilder map( @@ -1043,7 +1052,7 @@ public THIS value(Type type) { @Override protected Type build(String name) { - Preconditions.checkState(originalType == null, + Preconditions.checkState(originalLogicalType == null, "MAP is already a logical type and can't be changed."); if (keyType == null) { keyType = STRING_KEY; @@ -1191,7 +1200,7 @@ public LP named(String name) { @Override protected Type build(String name) { - Preconditions.checkState(originalType == null, + Preconditions.checkState(originalLogicalType == null, "LIST is already the logical type and can't be changed"); Preconditions.checkNotNull(elementType, "List element type"); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index c4e5da3da7..e29effd043 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -40,7 +40,13 @@ import org.apache.parquet.CorruptStatistics; import org.apache.parquet.ParquetReadOptions; import org.apache.parquet.format.CompressionCodec; +import org.apache.parquet.format.DecimalType; +import org.apache.parquet.format.IntType; +import org.apache.parquet.format.LogicalType; import org.apache.parquet.format.PageEncodingStats; +import org.apache.parquet.format.TimeType; +import org.apache.parquet.format.TimeUnit; +import org.apache.parquet.format.TimestampType; import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.format.ColumnChunk; import org.apache.parquet.format.ColumnMetaData; @@ -75,6 +81,7 @@ import org.apache.parquet.schema.Type.Repetition; import org.apache.parquet.schema.TypeVisitor; import org.apache.parquet.schema.Types; +import org.apache.parquet.schema.OriginalLogicalType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -171,8 +178,9 @@ public void visit(PrimitiveType primitiveType) { SchemaElement element = new SchemaElement(primitiveType.getName()); element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition())); element.setType(getType(primitiveType.getPrimitiveTypeName())); - if (primitiveType.getOriginalType() != null) { - element.setConverted_type(getConvertedType(primitiveType.getOriginalType())); + if (primitiveType.getOriginalLogicalType() != null) { + element.setConverted_type(getConvertedType(primitiveType.getOriginalLogicalType())); + element.setLogicalType(primitiveType.getOriginalLogicalType().toLogicalType()); } if (primitiveType.getDecimalMetadata() != null) { element.setPrecision(primitiveType.getDecimalMetadata().getPrecision()); @@ -200,8 +208,9 @@ public void visit(MessageType messageType) { public void visit(GroupType groupType) { SchemaElement element = new SchemaElement(groupType.getName()); element.setRepetition_type(toParquetRepetition(groupType.getRepetition())); - if (groupType.getOriginalType() != null) { - element.setConverted_type(getConvertedType(groupType.getOriginalType())); + if (groupType.getOriginalLogicalType() != null) { + element.setConverted_type(getConvertedType(groupType.getOriginalLogicalType())); + element.setLogicalType(groupType.getOriginalLogicalType().toLogicalType()); } if (groupType.getId() != null) { element.setField_id(groupType.getId().intValue()); @@ -586,60 +595,113 @@ Type getType(PrimitiveTypeName type) { } // Visible for testing - OriginalType getOriginalType(ConvertedType type) { + OriginalLogicalType getOriginalType(ConvertedType type, SchemaElement schemaElement) { switch (type) { case UTF8: - return OriginalType.UTF8; + return OriginalLogicalType.StringLogicalType.create(); case MAP: - return OriginalType.MAP; + return OriginalLogicalType.MapLogicalType.create(); case MAP_KEY_VALUE: - return OriginalType.MAP_KEY_VALUE; + return OriginalLogicalType.MapKeyValueType.create(); case LIST: - return OriginalType.LIST; + return OriginalLogicalType.ListLogicalType.create(); case ENUM: - return OriginalType.ENUM; + return OriginalLogicalType.EnumLogicalType.create(); case DECIMAL: - return OriginalType.DECIMAL; + if (schemaElement == null) { + return OriginalLogicalType.DecimalLogicalType.create(); + } + return OriginalLogicalType.DecimalLogicalType.create(schemaElement.scale, schemaElement.precision); case DATE: - return OriginalType.DATE; + return OriginalLogicalType.DateLogicalType.create(); case TIME_MILLIS: - return OriginalType.TIME_MILLIS; + return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); case TIME_MICROS: - return OriginalType.TIME_MICROS; + return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); case TIMESTAMP_MILLIS: - return OriginalType.TIMESTAMP_MILLIS; + return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return OriginalType.TIMESTAMP_MICROS; + return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); case INTERVAL: - return OriginalType.INTERVAL; + return OriginalLogicalType.IntervalLogicalType.create(); case INT_8: - return OriginalType.INT_8; + return OriginalLogicalType.IntLogicalType.create((byte) 8, true); case INT_16: - return OriginalType.INT_16; + return OriginalLogicalType.IntLogicalType.create((byte) 16, true); case INT_32: - return OriginalType.INT_32; + return OriginalLogicalType.IntLogicalType.create((byte) 32, true); case INT_64: - return OriginalType.INT_64; + return OriginalLogicalType.IntLogicalType.create((byte) 64, true); case UINT_8: - return OriginalType.UINT_8; + return OriginalLogicalType.IntLogicalType.create((byte) 8, false); case UINT_16: - return OriginalType.UINT_16; + return OriginalLogicalType.IntLogicalType.create((byte) 16, false); case UINT_32: - return OriginalType.UINT_32; + return OriginalLogicalType.IntLogicalType.create((byte) 32, false); case UINT_64: - return OriginalType.UINT_64; + return OriginalLogicalType.IntLogicalType.create((byte) 64, false); case JSON: - return OriginalType.JSON; + return OriginalLogicalType.JsonLogicalType.create(); case BSON: - return OriginalType.BSON; + return OriginalLogicalType.BsonLogicalType.create(); default: - throw new RuntimeException("Unknown converted type " + type); + return OriginalLogicalType.NullLogicalType.create(); + } + } + + OriginalLogicalType getOriginalType(LogicalType type) { + switch (type.getSetField()) { + case MAP: + return OriginalLogicalType.MapLogicalType.create(); + case BSON: + return OriginalLogicalType.BsonLogicalType.create(); + case DATE: + return OriginalLogicalType.DateLogicalType.create(); + case ENUM: + return OriginalLogicalType.EnumLogicalType.create(); + case JSON: + return OriginalLogicalType.JsonLogicalType.create(); + case LIST: + return OriginalLogicalType.ListLogicalType.create(); + case TIME: + TimeType time = type.getTIME(); + return OriginalLogicalType.TimeLogicalType.create(time.isAdjustedToUTC, convertTimeUnit(time.unit)); + case STRING: + return OriginalLogicalType.StringLogicalType.create(); + case DECIMAL: + DecimalType decimal = type.getDECIMAL(); + return OriginalLogicalType.DecimalLogicalType.create(decimal.scale, decimal.precision); + case INTEGER: + IntType integer = type.getINTEGER(); + return OriginalLogicalType.IntLogicalType.create(integer.bitWidth, integer.isSigned); + case UNKNOWN: + return null; + case TIMESTAMP: + TimestampType timestamp = type.getTIMESTAMP(); + return OriginalLogicalType.TimestampLogicalType.create(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); + default: + throw new RuntimeException("Unknown logical type " + type); + } + } + + OriginalLogicalType.TimeUnit convertTimeUnit(TimeUnit unit) { + switch (unit.getSetField()) { + case MICROS: + return OriginalLogicalType.TimeUnit.MICROS; + case MILLIS: + return OriginalLogicalType.TimeUnit.MILLIS; + default: + throw new RuntimeException("Unknown time unit " + unit); } } // Visible for testing - ConvertedType getConvertedType(OriginalType type) { - switch (type) { + ConvertedType getConvertedType(OriginalLogicalType type) { + OriginalType originalType = type.toOriginalType(); + if (originalType == null) { + return null; + } + switch (originalType) { case UTF8: return ConvertedType.UTF8; case MAP: @@ -985,8 +1047,15 @@ private void buildChildren(Types.GroupBuilder builder, buildChildren((Types.GroupBuilder) childBuilder, schema, schemaElement.num_children, columnOrders, columnCount); } + if (schemaElement.isSetLogicalType()) { + childBuilder.as(getOriginalType(schemaElement.logicalType)); + } if (schemaElement.isSetConverted_type()) { - childBuilder.as(getOriginalType(schemaElement.converted_type)); + OriginalLogicalType originalType = getOriginalType(schemaElement.converted_type, schemaElement); + OriginalLogicalType newLogicalType = getOriginalType(schemaElement.logicalType); + if (!originalType.equals(newLogicalType)) { + childBuilder.as(getOriginalType(schemaElement.converted_type, schemaElement)); + } } if (schemaElement.isSetField_id()) { childBuilder.id(schemaElement.field_id); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java index cb6af54ebb..6e3f845328 100755 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -27,8 +27,7 @@ import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.map.ObjectWriter; -import org.codehaus.jackson.map.SerializationConfig.Feature; +import org.codehaus.jackson.map.SerializationConfig; /** * Meta Data block stored in the footer of the file @@ -41,6 +40,10 @@ public class ParquetMetadata { private static final ObjectMapper objectMapper = new ObjectMapper(); + static { + objectMapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); + } + /** * * @param parquetMetaData diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index b83da5dbd2..10bf5ecd5c 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -60,6 +60,8 @@ import org.apache.parquet.column.statistics.IntStatistics; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.format.DecimalType; +import org.apache.parquet.format.LogicalType; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.hadoop.metadata.ColumnPath; @@ -67,6 +69,7 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.OriginalLogicalType; import org.junit.Assert; import org.junit.Test; import org.apache.parquet.example.Paper; @@ -130,12 +133,14 @@ public void testSchemaConverterDecimal() { .setRepetition_type(FieldRepetitionType.REQUIRED) .setType(Type.BYTE_ARRAY) .setConverted_type(ConvertedType.DECIMAL) + .setLogicalType(LogicalType.DECIMAL(new DecimalType(2, 9))) .setPrecision(9).setScale(2), new SchemaElement("aFixedDecimal") .setRepetition_type(FieldRepetitionType.OPTIONAL) .setType(Type.FIXED_LEN_BYTE_ARRAY) .setType_length(4) .setConverted_type(ConvertedType.DECIMAL) + .setLogicalType(LogicalType.DECIMAL(new DecimalType(2, 9))) .setPrecision(9).setScale(2) ); Assert.assertEquals(expected, schemaElements); @@ -163,10 +168,11 @@ public void testEnumEquivalence() { assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type))); } for (OriginalType original : OriginalType.values()) { - assertEquals(original, parquetMetadataConverter.getOriginalType(parquetMetadataConverter.getConvertedType(original))); + assertEquals(original, parquetMetadataConverter.getOriginalType( + parquetMetadataConverter.getConvertedType(OriginalLogicalType.fromOriginalType(original)), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { - assertEquals(converted, parquetMetadataConverter.getConvertedType(parquetMetadataConverter.getOriginalType(converted))); + assertEquals(converted, parquetMetadataConverter.getConvertedType(parquetMetadataConverter.getOriginalType(converted, null))); } } @@ -336,7 +342,7 @@ private ColumnChunkMetaData createColumnChunkMetaData() { 0, 0, 0, 0, 0); return md; } - + @Test public void testEncodingsCache() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); From 0a66346688beb8410de594feb3e00492ba591081 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Thu, 29 Mar 2018 14:02:37 +0200 Subject: [PATCH 02/12] refactor: move conversion logic to logical type classes --- .../parquet/schema/OriginalLogicalType.java | 124 +++++++++++++++++- .../converter/ParquetMetadataConverter.java | 60 +-------- .../TestParquetMetadataConverter.java | 4 +- 3 files changed, 121 insertions(+), 67 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java index c635c5f1f3..294bd6fa63 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java @@ -18,8 +18,8 @@ */ package org.apache.parquet.schema; -import org.apache.parquet.ShouldNeverHappenException; import org.apache.parquet.format.BsonType; +import org.apache.parquet.format.ConvertedType; import org.apache.parquet.format.DateType; import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.EnumType; @@ -41,10 +41,17 @@ public interface OriginalLogicalType { /** * Convert this parquet-mr logical type to parquet-format LogicalType. * - * @return the parquet-format representation of this logical type implementation + * @return the parquet-format LogicalType representation of this logical type implementation */ LogicalType toLogicalType(); + /** + * Convert this parquet-mr logical type to parquet-format ConvertedType. + * + * @return the parquet-format ConvertedType representation of this logical type implementation + */ + ConvertedType toConvertedType(); + /** * Convert this logical type to old logical type representation in parquet-mr (if there's any). * Those logical type implementations, which don't have a corresponding mapping should return null. @@ -125,6 +132,11 @@ public LogicalType toLogicalType() { return LogicalType.STRING(new StringType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.UTF8; + } + @Override public OriginalType toOriginalType() { return OriginalType.UTF8; @@ -157,6 +169,11 @@ public LogicalType toLogicalType() { return LogicalType.MAP(new MapType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.MAP; + } + @Override public OriginalType toOriginalType() { return OriginalType.MAP; @@ -189,6 +206,11 @@ public LogicalType toLogicalType() { return LogicalType.LIST(new ListType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.LIST; + } + @Override public OriginalType toOriginalType() { return OriginalType.LIST; @@ -221,6 +243,11 @@ public LogicalType toLogicalType() { return LogicalType.ENUM(new EnumType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.ENUM; + } + @Override public OriginalType toOriginalType() { return OriginalType.ENUM; @@ -269,6 +296,11 @@ public LogicalType toLogicalType() { return LogicalType.DECIMAL(new DecimalType(scale, precision)); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.DECIMAL; + } + @Override public OriginalType toOriginalType() { return OriginalType.DECIMAL; @@ -304,6 +336,11 @@ public LogicalType toLogicalType() { return LogicalType.DATE(new DateType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.DATE; + } + @Override public OriginalType toOriginalType() { return OriginalType.DATE; @@ -333,7 +370,7 @@ static org.apache.parquet.format.TimeUnit convertUnit(TimeUnit unit) { case MILLIS: return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); default: - throw new ShouldNeverHappenException(); + throw new RuntimeException("Unknown time unit " + unit); } } @@ -355,6 +392,18 @@ public LogicalType toLogicalType() { return LogicalType.TIME(new TimeType(isAdjustedToUTC, convertUnit(unit))); } + @Override + public ConvertedType toConvertedType() { + switch (toOriginalType()) { + case TIME_MILLIS: + return ConvertedType.TIME_MILLIS; + case TIME_MICROS: + return ConvertedType.TIME_MICROS; + default: + throw new RuntimeException("Unknown converted type for " + toOriginalType()); + } + } + @Override public OriginalType toOriginalType() { switch (unit) { @@ -362,9 +411,9 @@ public OriginalType toOriginalType() { return OriginalType.TIME_MILLIS; case MICROS: return OriginalType.TIME_MICROS; + default: + throw new RuntimeException("Unknown original type for " + unit); } - - throw new ShouldNeverHappenException(); } @Override @@ -400,6 +449,18 @@ public LogicalType toLogicalType() { return LogicalType.TIMESTAMP(new TimestampType(isAdjustedToUTC, convertUnit(unit))); } + @Override + public ConvertedType toConvertedType() { + switch (toOriginalType()) { + case TIMESTAMP_MICROS: + return ConvertedType.TIMESTAMP_MICROS; + case TIMESTAMP_MILLIS: + return ConvertedType.TIMESTAMP_MILLIS; + default: + throw new RuntimeException("Unknown converted type for " + unit); + } + } + @Override public OriginalType toOriginalType() { switch (unit) { @@ -408,7 +469,7 @@ public OriginalType toOriginalType() { case MICROS: return OriginalType.TIMESTAMP_MICROS; default: - throw new ShouldNeverHappenException(); + throw new RuntimeException("Unknown original type for " + unit); } } @@ -445,6 +506,30 @@ public LogicalType toLogicalType() { return LogicalType.INTEGER(new IntType(bitWidth, isSigned)); } + @Override + public ConvertedType toConvertedType() { + switch (toOriginalType()) { + case INT_8: + return ConvertedType.INT_8; + case INT_16: + return ConvertedType.INT_16; + case INT_32: + return ConvertedType.INT_32; + case INT_64: + return ConvertedType.INT_64; + case UINT_8: + return ConvertedType.UINT_8; + case UINT_16: + return ConvertedType.UINT_16; + case UINT_32: + return ConvertedType.UINT_32; + case UINT_64: + return ConvertedType.UINT_64; + default: + throw new RuntimeException("Unknown original type " + toOriginalType()); + } + } + @Override public OriginalType toOriginalType() { switch (bitWidth) { @@ -457,7 +542,7 @@ public OriginalType toOriginalType() { case 64: return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64; default: - throw new ShouldNeverHappenException(); + throw new RuntimeException("Unknown original type " + toOriginalType()); } } @@ -491,6 +576,11 @@ public LogicalType toLogicalType() { return LogicalType.UNKNOWN(new NullType()); } + @Override + public ConvertedType toConvertedType() { + return null; + } + @Override public OriginalType toOriginalType() { return null; @@ -523,6 +613,11 @@ public LogicalType toLogicalType() { return LogicalType.JSON(new JsonType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.JSON; + } + @Override public OriginalType toOriginalType() { return OriginalType.JSON; @@ -555,6 +650,11 @@ public LogicalType toLogicalType() { return LogicalType.BSON(new BsonType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.BSON; + } + @Override public OriginalType toOriginalType() { return OriginalType.BSON; @@ -587,6 +687,11 @@ public LogicalType toLogicalType() { return LogicalType.UNKNOWN(new NullType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.INTERVAL; + } + @Override public OriginalType toOriginalType() { return OriginalType.INTERVAL; @@ -619,6 +724,11 @@ public LogicalType toLogicalType() { return LogicalType.UNKNOWN(new NullType()); } + @Override + public ConvertedType toConvertedType() { + return ConvertedType.MAP_KEY_VALUE; + } + @Override public OriginalType toOriginalType() { return OriginalType.MAP_KEY_VALUE; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index e29effd043..df5068260b 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -179,7 +179,7 @@ public void visit(PrimitiveType primitiveType) { element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition())); element.setType(getType(primitiveType.getPrimitiveTypeName())); if (primitiveType.getOriginalLogicalType() != null) { - element.setConverted_type(getConvertedType(primitiveType.getOriginalLogicalType())); + element.setConverted_type(primitiveType.getOriginalLogicalType().toConvertedType()); element.setLogicalType(primitiveType.getOriginalLogicalType().toLogicalType()); } if (primitiveType.getDecimalMetadata() != null) { @@ -209,7 +209,7 @@ public void visit(GroupType groupType) { SchemaElement element = new SchemaElement(groupType.getName()); element.setRepetition_type(toParquetRepetition(groupType.getRepetition())); if (groupType.getOriginalLogicalType() != null) { - element.setConverted_type(getConvertedType(groupType.getOriginalLogicalType())); + element.setConverted_type(groupType.getOriginalLogicalType().toConvertedType()); element.setLogicalType(groupType.getOriginalLogicalType().toLogicalType()); } if (groupType.getId() != null) { @@ -695,62 +695,6 @@ OriginalLogicalType.TimeUnit convertTimeUnit(TimeUnit unit) { } } - // Visible for testing - ConvertedType getConvertedType(OriginalLogicalType type) { - OriginalType originalType = type.toOriginalType(); - if (originalType == null) { - return null; - } - switch (originalType) { - case UTF8: - return ConvertedType.UTF8; - case MAP: - return ConvertedType.MAP; - case MAP_KEY_VALUE: - return ConvertedType.MAP_KEY_VALUE; - case LIST: - return ConvertedType.LIST; - case ENUM: - return ConvertedType.ENUM; - case DECIMAL: - return ConvertedType.DECIMAL; - case DATE: - return ConvertedType.DATE; - case TIME_MILLIS: - return ConvertedType.TIME_MILLIS; - case TIME_MICROS: - return ConvertedType.TIME_MICROS; - case TIMESTAMP_MILLIS: - return ConvertedType.TIMESTAMP_MILLIS; - case TIMESTAMP_MICROS: - return ConvertedType.TIMESTAMP_MICROS; - case INTERVAL: - return ConvertedType.INTERVAL; - case INT_8: - return ConvertedType.INT_8; - case INT_16: - return ConvertedType.INT_16; - case INT_32: - return ConvertedType.INT_32; - case INT_64: - return ConvertedType.INT_64; - case UINT_8: - return ConvertedType.UINT_8; - case UINT_16: - return ConvertedType.UINT_16; - case UINT_32: - return ConvertedType.UINT_32; - case UINT_64: - return ConvertedType.UINT_64; - case JSON: - return ConvertedType.JSON; - case BSON: - return ConvertedType.BSON; - default: - throw new RuntimeException("Unknown original type " + type); - } - } - private static void addKeyValue(FileMetaData fileMetaData, String key, String value) { KeyValue keyValue = new KeyValue(key); keyValue.value = value; diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 10bf5ecd5c..18aaf6ed66 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -169,10 +169,10 @@ public void testEnumEquivalence() { } for (OriginalType original : OriginalType.values()) { assertEquals(original, parquetMetadataConverter.getOriginalType( - parquetMetadataConverter.getConvertedType(OriginalLogicalType.fromOriginalType(original)), null).toOriginalType()); + OriginalLogicalType.fromOriginalType(original).toConvertedType(), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { - assertEquals(converted, parquetMetadataConverter.getConvertedType(parquetMetadataConverter.getOriginalType(converted, null))); + assertEquals(converted, parquetMetadataConverter.getOriginalType(converted, null).toConvertedType()); } } From f8e22366a9811cb7603bde1543644a58f09a5259 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Thu, 29 Mar 2018 14:07:34 +0200 Subject: [PATCH 03/12] better (hopefully) name for new type --- ...alType.java => LogicalTypeAnnotation.java} | 202 +++++++++--------- .../apache/parquet/schema/PrimitiveType.java | 2 +- .../java/org/apache/parquet/schema/Type.java | 16 +- .../java/org/apache/parquet/schema/Types.java | 18 +- .../converter/ParquetMetadataConverter.java | 98 ++++----- .../TestParquetMetadataConverter.java | 4 +- 6 files changed, 170 insertions(+), 170 deletions(-) rename parquet-column/src/main/java/org/apache/parquet/schema/{OriginalLogicalType.java => LogicalTypeAnnotation.java} (68%) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java similarity index 68% rename from parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java rename to parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 294bd6fa63..af20dc166e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalLogicalType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -37,7 +37,7 @@ import java.util.Objects; -public interface OriginalLogicalType { +public interface LogicalTypeAnnotation { /** * Convert this parquet-mr logical type to parquet-format LogicalType. * @@ -63,68 +63,68 @@ public interface OriginalLogicalType { /** * Helper method to convert the old representation of logical types (OriginalType) to new logical type. */ - static OriginalLogicalType fromOriginalType(OriginalType originalType) { + static LogicalTypeAnnotation fromOriginalType(OriginalType originalType) { if (originalType == null) { return null; } switch (originalType) { case UTF8: - return OriginalLogicalType.StringLogicalType.create(); + return StringLogicalTypeAnnotation.create(); case MAP: - return OriginalLogicalType.MapLogicalType.create(); + return MapLogicalTypeAnnotation.create(); case DECIMAL: - return OriginalLogicalType.DecimalLogicalType.create(); + return DecimalLogicalTypeAnnotation.create(); case LIST: - return OriginalLogicalType.ListLogicalType.create(); + return ListLogicalTypeAnnotation.create(); case DATE: - return OriginalLogicalType.DateLogicalType.create(); + return DateLogicalTypeAnnotation.create(); case INTERVAL: - return OriginalLogicalType.IntervalLogicalType.create(); + return IntervalLogicalTypeAnnotation.create(); case TIMESTAMP_MILLIS: - return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + return TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + return TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); case TIME_MILLIS: - return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + return TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + return TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); case UINT_8: - return OriginalLogicalType.IntLogicalType.create((byte) 8, false); + return IntLogicalTypeAnnotation.create((byte) 8, false); case UINT_16: - return OriginalLogicalType.IntLogicalType.create((byte) 16, false); + return IntLogicalTypeAnnotation.create((byte) 16, false); case UINT_32: - return OriginalLogicalType.IntLogicalType.create((byte) 32, false); + return IntLogicalTypeAnnotation.create((byte) 32, false); case UINT_64: - return OriginalLogicalType.IntLogicalType.create((byte) 64, false); + return IntLogicalTypeAnnotation.create((byte) 64, false); case INT_8: - return OriginalLogicalType.IntLogicalType.create((byte) 8, true); + return IntLogicalTypeAnnotation.create((byte) 8, true); case INT_16: - return OriginalLogicalType.IntLogicalType.create((byte) 16, true); + return IntLogicalTypeAnnotation.create((byte) 16, true); case INT_32: - return OriginalLogicalType.IntLogicalType.create((byte) 32, true); + return IntLogicalTypeAnnotation.create((byte) 32, true); case INT_64: - return OriginalLogicalType.IntLogicalType.create((byte) 64, true); + return IntLogicalTypeAnnotation.create((byte) 64, true); case ENUM: - return OriginalLogicalType.EnumLogicalType.create(); + return EnumLogicalTypeAnnotation.create(); case JSON: - return OriginalLogicalType.JsonLogicalType.create(); + return JsonLogicalTypeAnnotation.create(); case BSON: - return OriginalLogicalType.BsonLogicalType.create(); + return BsonLogicalTypeAnnotation.create(); case MAP_KEY_VALUE: - return OriginalLogicalType.MapKeyValueType.create(); + return MapKeyValueTypeAnnotation.create(); default: - return OriginalLogicalType.NullLogicalType.create(); + return NullLogicalTypeAnnotation.create(); } } - class StringLogicalType implements OriginalLogicalType { - private static final StringLogicalType INSTANCE = new StringLogicalType(); + class StringLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private StringLogicalType() { + private StringLogicalTypeAnnotation() { } @Override @@ -144,7 +144,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof StringLogicalType; + return obj instanceof StringLogicalTypeAnnotation; } @Override @@ -154,14 +154,14 @@ public int hashCode() { } } - class MapLogicalType implements OriginalLogicalType { - private static final MapLogicalType INSTANCE = new MapLogicalType(); + class MapLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final MapLogicalTypeAnnotation INSTANCE = new MapLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private MapLogicalType() { + private MapLogicalTypeAnnotation() { } @Override @@ -181,7 +181,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof MapLogicalType; + return obj instanceof MapLogicalTypeAnnotation; } @Override @@ -191,14 +191,14 @@ public int hashCode() { } } - class ListLogicalType implements OriginalLogicalType { - private static final ListLogicalType INSTANCE = new ListLogicalType(); + class ListLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final ListLogicalTypeAnnotation INSTANCE = new ListLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private ListLogicalType() { + private ListLogicalTypeAnnotation() { } @Override @@ -218,7 +218,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof ListLogicalType; + return obj instanceof ListLogicalTypeAnnotation; } @Override @@ -228,14 +228,14 @@ public int hashCode() { } } - class EnumLogicalType implements OriginalLogicalType { - private static final EnumLogicalType INSTANCE = new EnumLogicalType(); + class EnumLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final EnumLogicalTypeAnnotation INSTANCE = new EnumLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private EnumLogicalType() { + private EnumLogicalTypeAnnotation() { } @Override @@ -255,7 +255,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof EnumLogicalType; + return obj instanceof EnumLogicalTypeAnnotation; } @Override @@ -265,20 +265,20 @@ public int hashCode() { } } - class DecimalLogicalType implements OriginalLogicalType { + class DecimalLogicalTypeAnnotation implements LogicalTypeAnnotation { private int scale; private int precision; - public static OriginalLogicalType create() { - return new DecimalLogicalType(0, 0); + public static LogicalTypeAnnotation create() { + return new DecimalLogicalTypeAnnotation(0, 0); } - public static OriginalLogicalType create(int scale, int precision) { - return new DecimalLogicalType(scale, precision); + public static LogicalTypeAnnotation create(int scale, int precision) { + return new DecimalLogicalTypeAnnotation(scale, precision); } - private DecimalLogicalType(int scale, int precision) { + private DecimalLogicalTypeAnnotation(int scale, int precision) { this.scale = scale; this.precision = precision; } @@ -308,10 +308,10 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - if (!(obj instanceof DecimalLogicalType)) { + if (!(obj instanceof DecimalLogicalTypeAnnotation)) { return false; } - DecimalLogicalType other = (DecimalLogicalType) obj; + DecimalLogicalTypeAnnotation other = (DecimalLogicalTypeAnnotation) obj; return scale == other.scale && precision == other.precision; } @@ -321,14 +321,14 @@ public int hashCode() { } } - class DateLogicalType implements OriginalLogicalType { - private static final DateLogicalType INSTANCE = new DateLogicalType(); + class DateLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final DateLogicalTypeAnnotation INSTANCE = new DateLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private DateLogicalType() { + private DateLogicalTypeAnnotation() { } @Override @@ -348,7 +348,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof DateLogicalType; + return obj instanceof DateLogicalTypeAnnotation; } @Override @@ -374,15 +374,15 @@ static org.apache.parquet.format.TimeUnit convertUnit(TimeUnit unit) { } } - class TimeLogicalType implements OriginalLogicalType { + class TimeLogicalTypeAnnotation implements LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; - public static OriginalLogicalType create(boolean isAdjustedToUTC, TimeUnit unit) { - return new TimeLogicalType(isAdjustedToUTC, unit); + public static LogicalTypeAnnotation create(boolean isAdjustedToUTC, TimeUnit unit) { + return new TimeLogicalTypeAnnotation(isAdjustedToUTC, unit); } - private TimeLogicalType(boolean isAdjustedToUTC, TimeUnit unit) { + private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.isAdjustedToUTC = isAdjustedToUTC; this.unit = unit; } @@ -418,10 +418,10 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - if (!(obj instanceof TimeLogicalType)) { + if (!(obj instanceof TimeLogicalTypeAnnotation)) { return false; } - TimeLogicalType other = (TimeLogicalType) obj; + TimeLogicalTypeAnnotation other = (TimeLogicalTypeAnnotation) obj; return isAdjustedToUTC == other.isAdjustedToUTC && unit == other.unit; } @@ -431,15 +431,15 @@ public int hashCode() { } } - class TimestampLogicalType implements OriginalLogicalType { + class TimestampLogicalTypeAnnotation implements LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; - public static OriginalLogicalType create(boolean isAdjustedToUTC, TimeUnit unit) { - return new TimestampLogicalType(isAdjustedToUTC, unit); + public static LogicalTypeAnnotation create(boolean isAdjustedToUTC, TimeUnit unit) { + return new TimestampLogicalTypeAnnotation(isAdjustedToUTC, unit); } - private TimestampLogicalType(boolean isAdjustedToUTC, TimeUnit unit) { + private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.isAdjustedToUTC = isAdjustedToUTC; this.unit = unit; } @@ -475,10 +475,10 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - if (!(obj instanceof TimestampLogicalType)) { + if (!(obj instanceof TimestampLogicalTypeAnnotation)) { return false; } - TimestampLogicalType other = (TimestampLogicalType) obj; + TimestampLogicalTypeAnnotation other = (TimestampLogicalTypeAnnotation) obj; return (isAdjustedToUTC == other.isAdjustedToUTC) && (unit == other.unit); } @@ -488,15 +488,15 @@ public int hashCode() { } } - class IntLogicalType implements OriginalLogicalType { + class IntLogicalTypeAnnotation implements LogicalTypeAnnotation { private final byte bitWidth; private final boolean isSigned; - public static OriginalLogicalType create(byte bitWidth, boolean isSigned) { - return new IntLogicalType(bitWidth, isSigned); + public static LogicalTypeAnnotation create(byte bitWidth, boolean isSigned) { + return new IntLogicalTypeAnnotation(bitWidth, isSigned); } - private IntLogicalType(byte bitWidth, boolean isSigned) { + private IntLogicalTypeAnnotation(byte bitWidth, boolean isSigned) { this.bitWidth = bitWidth; this.isSigned = isSigned; } @@ -548,10 +548,10 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - if (!(obj instanceof IntLogicalType)) { + if (!(obj instanceof IntLogicalTypeAnnotation)) { return false; } - IntLogicalType other = (IntLogicalType) obj; + IntLogicalTypeAnnotation other = (IntLogicalTypeAnnotation) obj; return (bitWidth == other.bitWidth) && (isSigned == other.isSigned); } @@ -561,14 +561,14 @@ public int hashCode() { } } - class NullLogicalType implements OriginalLogicalType { - private static final NullLogicalType INSTANCE = new NullLogicalType(); + class NullLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final NullLogicalTypeAnnotation INSTANCE = new NullLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private NullLogicalType() { + private NullLogicalTypeAnnotation() { } @Override @@ -588,7 +588,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof NullLogicalType; + return obj instanceof NullLogicalTypeAnnotation; } @Override @@ -598,14 +598,14 @@ public int hashCode() { } } - class JsonLogicalType implements OriginalLogicalType { - private static final JsonLogicalType INSTANCE = new JsonLogicalType(); + class JsonLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final JsonLogicalTypeAnnotation INSTANCE = new JsonLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private JsonLogicalType() { + private JsonLogicalTypeAnnotation() { } @Override @@ -625,7 +625,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof JsonLogicalType; + return obj instanceof JsonLogicalTypeAnnotation; } @Override @@ -635,14 +635,14 @@ public int hashCode() { } } - class BsonLogicalType implements OriginalLogicalType { - private static final BsonLogicalType INSTANCE = new BsonLogicalType(); + class BsonLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static final BsonLogicalTypeAnnotation INSTANCE = new BsonLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private BsonLogicalType() { + private BsonLogicalTypeAnnotation() { } @Override @@ -662,7 +662,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof BsonLogicalType; + return obj instanceof BsonLogicalTypeAnnotation; } @Override @@ -672,14 +672,14 @@ public int hashCode() { } } - class IntervalLogicalType implements OriginalLogicalType { - private static IntervalLogicalType INSTANCE = new IntervalLogicalType(); + class IntervalLogicalTypeAnnotation implements LogicalTypeAnnotation { + private static IntervalLogicalTypeAnnotation INSTANCE = new IntervalLogicalTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private IntervalLogicalType() { + private IntervalLogicalTypeAnnotation() { } @Override @@ -699,7 +699,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof IntervalLogicalType; + return obj instanceof IntervalLogicalTypeAnnotation; } @Override @@ -709,14 +709,14 @@ public int hashCode() { } } - class MapKeyValueType implements OriginalLogicalType { - private static MapKeyValueType INSTANCE = new MapKeyValueType(); + class MapKeyValueTypeAnnotation implements LogicalTypeAnnotation { + private static MapKeyValueTypeAnnotation INSTANCE = new MapKeyValueTypeAnnotation(); - public static OriginalLogicalType create() { + public static LogicalTypeAnnotation create() { return INSTANCE; } - private MapKeyValueType() { + private MapKeyValueTypeAnnotation() { } @Override @@ -736,7 +736,7 @@ public OriginalType toOriginalType() { @Override public boolean equals(Object obj) { - return obj instanceof MapKeyValueType; + return obj instanceof MapKeyValueTypeAnnotation; } @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index d62ea26e9b..05f1ced1ad 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -451,7 +451,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this.length = length; this.decimalMeta = decimalMeta; if (originalType == OriginalType.DECIMAL) { - OriginalLogicalType.DecimalLogicalType originalLogicalType = (OriginalLogicalType.DecimalLogicalType) getOriginalLogicalType(); + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation originalLogicalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) getLogicalTypeAnnotation(); originalLogicalType.setPrecision(decimalMeta.getPrecision()); originalLogicalType.setScale(decimalMeta.getScale()); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index d7d0a22387..fa56104600 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -119,7 +119,7 @@ public boolean isMoreRestrictiveThan(Repetition other) { private final String name; private final Repetition repetition; - private final OriginalLogicalType originalLogicalType; + private final LogicalTypeAnnotation logicalTypeAnnotation; private final ID id; /** @@ -151,7 +151,7 @@ public Type(String name, Repetition repetition, OriginalType originalType) { super(); this.name = checkNotNull(name, "name"); this.repetition = checkNotNull(repetition, "repetition"); - this.originalLogicalType = originalType == null ? null : OriginalLogicalType.fromOriginalType(originalType); + this.logicalTypeAnnotation = originalType == null ? null : LogicalTypeAnnotation.fromOriginalType(originalType); this.id = id; } @@ -190,15 +190,15 @@ public ID getId() { return id; } - public OriginalLogicalType getOriginalLogicalType() { - return originalLogicalType; + public LogicalTypeAnnotation getLogicalTypeAnnotation() { + return logicalTypeAnnotation; } /** * @return the original type (LIST, MAP, ...) */ public OriginalType getOriginalType() { - return originalLogicalType == null ? null : originalLogicalType.toOriginalType(); + return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); } /** @@ -251,8 +251,8 @@ public PrimitiveType asPrimitiveType() { public int hashCode() { int c = repetition.hashCode(); c = 31 * c + name.hashCode(); - if (originalLogicalType != null) { - c = 31 * c + originalLogicalType.hashCode(); + if (logicalTypeAnnotation != null) { + c = 31 * c + logicalTypeAnnotation.hashCode(); } if (id != null) { c = 31 * c + id.hashCode(); @@ -266,7 +266,7 @@ protected boolean equals(Type other) { && repetition == other.repetition && eqOrBothNull(repetition, other.repetition) && eqOrBothNull(id, other.id) - && eqOrBothNull(originalLogicalType, other.originalLogicalType); + && eqOrBothNull(logicalTypeAnnotation, other.logicalTypeAnnotation); }; @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index ed96cb2f88..413064156b 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -198,7 +198,7 @@ public abstract static class Builder { protected final Class returnClass; protected Type.Repetition repetition = null; - protected OriginalLogicalType originalLogicalType = null; + protected LogicalTypeAnnotation logicalTypeAnnotation = null; protected Type.ID id = null; private boolean repetitionAlreadySet = false; @@ -252,12 +252,12 @@ protected final THIS repetition(Type.Repetition repetition) { * @return this builder for method chaining */ public THIS as(OriginalType type) { - this.originalLogicalType = OriginalLogicalType.fromOriginalType(type); + this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(type); return self(); } - public THIS as(OriginalLogicalType type) { - this.originalLogicalType = type; + public THIS as(LogicalTypeAnnotation type) { + this.logicalTypeAnnotation = type; return self(); } @@ -309,7 +309,7 @@ public P named(String name) { } protected OriginalType getOriginalType () { - return originalLogicalType == null ? null : originalLogicalType.toOriginalType(); + return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); } } @@ -410,8 +410,8 @@ protected PrimitiveType build(String name) { DecimalMetadata meta = decimalMetadata(); // validate type annotations and required metadata - if (originalLogicalType != null) { - OriginalType originalType = originalLogicalType.toOriginalType(); + if (logicalTypeAnnotation != null) { + OriginalType originalType = logicalTypeAnnotation.toOriginalType(); switch (originalType) { case UTF8: case JSON: @@ -1052,7 +1052,7 @@ public THIS value(Type type) { @Override protected Type build(String name) { - Preconditions.checkState(originalLogicalType == null, + Preconditions.checkState(logicalTypeAnnotation == null, "MAP is already a logical type and can't be changed."); if (keyType == null) { keyType = STRING_KEY; @@ -1200,7 +1200,7 @@ public LP named(String name) { @Override protected Type build(String name) { - Preconditions.checkState(originalLogicalType == null, + Preconditions.checkState(logicalTypeAnnotation == null, "LIST is already the logical type and can't be changed"); Preconditions.checkNotNull(elementType, "List element type"); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index df5068260b..f4bc5e583d 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -81,7 +81,7 @@ import org.apache.parquet.schema.Type.Repetition; import org.apache.parquet.schema.TypeVisitor; import org.apache.parquet.schema.Types; -import org.apache.parquet.schema.OriginalLogicalType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -178,9 +178,9 @@ public void visit(PrimitiveType primitiveType) { SchemaElement element = new SchemaElement(primitiveType.getName()); element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition())); element.setType(getType(primitiveType.getPrimitiveTypeName())); - if (primitiveType.getOriginalLogicalType() != null) { - element.setConverted_type(primitiveType.getOriginalLogicalType().toConvertedType()); - element.setLogicalType(primitiveType.getOriginalLogicalType().toLogicalType()); + if (primitiveType.getLogicalTypeAnnotation() != null) { + element.setConverted_type(primitiveType.getLogicalTypeAnnotation().toConvertedType()); + element.setLogicalType(primitiveType.getLogicalTypeAnnotation().toLogicalType()); } if (primitiveType.getDecimalMetadata() != null) { element.setPrecision(primitiveType.getDecimalMetadata().getPrecision()); @@ -208,9 +208,9 @@ public void visit(MessageType messageType) { public void visit(GroupType groupType) { SchemaElement element = new SchemaElement(groupType.getName()); element.setRepetition_type(toParquetRepetition(groupType.getRepetition())); - if (groupType.getOriginalLogicalType() != null) { - element.setConverted_type(groupType.getOriginalLogicalType().toConvertedType()); - element.setLogicalType(groupType.getOriginalLogicalType().toLogicalType()); + if (groupType.getLogicalTypeAnnotation() != null) { + element.setConverted_type(groupType.getLogicalTypeAnnotation().toConvertedType()); + element.setLogicalType(groupType.getLogicalTypeAnnotation().toLogicalType()); } if (groupType.getId() != null) { element.setField_id(groupType.getId().intValue()); @@ -595,101 +595,101 @@ Type getType(PrimitiveTypeName type) { } // Visible for testing - OriginalLogicalType getOriginalType(ConvertedType type, SchemaElement schemaElement) { + LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaElement) { switch (type) { case UTF8: - return OriginalLogicalType.StringLogicalType.create(); + return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create(); case MAP: - return OriginalLogicalType.MapLogicalType.create(); + return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create(); case MAP_KEY_VALUE: - return OriginalLogicalType.MapKeyValueType.create(); + return LogicalTypeAnnotation.MapKeyValueTypeAnnotation.create(); case LIST: - return OriginalLogicalType.ListLogicalType.create(); + return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create(); case ENUM: - return OriginalLogicalType.EnumLogicalType.create(); + return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create(); case DECIMAL: if (schemaElement == null) { - return OriginalLogicalType.DecimalLogicalType.create(); + return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(); } - return OriginalLogicalType.DecimalLogicalType.create(schemaElement.scale, schemaElement.precision); + return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(schemaElement.scale, schemaElement.precision); case DATE: - return OriginalLogicalType.DateLogicalType.create(); + return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create(); case TIME_MILLIS: - return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return OriginalLogicalType.TimeLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); case TIMESTAMP_MILLIS: - return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MILLIS); + return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return OriginalLogicalType.TimestampLogicalType.create(true, OriginalLogicalType.TimeUnit.MICROS); + return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); case INTERVAL: - return OriginalLogicalType.IntervalLogicalType.create(); + return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.create(); case INT_8: - return OriginalLogicalType.IntLogicalType.create((byte) 8, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, true); case INT_16: - return OriginalLogicalType.IntLogicalType.create((byte) 16, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 16, true); case INT_32: - return OriginalLogicalType.IntLogicalType.create((byte) 32, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 32, true); case INT_64: - return OriginalLogicalType.IntLogicalType.create((byte) 64, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 64, true); case UINT_8: - return OriginalLogicalType.IntLogicalType.create((byte) 8, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, false); case UINT_16: - return OriginalLogicalType.IntLogicalType.create((byte) 16, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 16, false); case UINT_32: - return OriginalLogicalType.IntLogicalType.create((byte) 32, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 32, false); case UINT_64: - return OriginalLogicalType.IntLogicalType.create((byte) 64, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 64, false); case JSON: - return OriginalLogicalType.JsonLogicalType.create(); + return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create(); case BSON: - return OriginalLogicalType.BsonLogicalType.create(); + return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create(); default: - return OriginalLogicalType.NullLogicalType.create(); + return LogicalTypeAnnotation.NullLogicalTypeAnnotation.create(); } } - OriginalLogicalType getOriginalType(LogicalType type) { + LogicalTypeAnnotation getOriginalType(LogicalType type) { switch (type.getSetField()) { case MAP: - return OriginalLogicalType.MapLogicalType.create(); + return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create(); case BSON: - return OriginalLogicalType.BsonLogicalType.create(); + return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create(); case DATE: - return OriginalLogicalType.DateLogicalType.create(); + return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create(); case ENUM: - return OriginalLogicalType.EnumLogicalType.create(); + return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create(); case JSON: - return OriginalLogicalType.JsonLogicalType.create(); + return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create(); case LIST: - return OriginalLogicalType.ListLogicalType.create(); + return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create(); case TIME: TimeType time = type.getTIME(); - return OriginalLogicalType.TimeLogicalType.create(time.isAdjustedToUTC, convertTimeUnit(time.unit)); + return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(time.isAdjustedToUTC, convertTimeUnit(time.unit)); case STRING: - return OriginalLogicalType.StringLogicalType.create(); + return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create(); case DECIMAL: DecimalType decimal = type.getDECIMAL(); - return OriginalLogicalType.DecimalLogicalType.create(decimal.scale, decimal.precision); + return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(decimal.scale, decimal.precision); case INTEGER: IntType integer = type.getINTEGER(); - return OriginalLogicalType.IntLogicalType.create(integer.bitWidth, integer.isSigned); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(integer.bitWidth, integer.isSigned); case UNKNOWN: return null; case TIMESTAMP: TimestampType timestamp = type.getTIMESTAMP(); - return OriginalLogicalType.TimestampLogicalType.create(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); + return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); default: throw new RuntimeException("Unknown logical type " + type); } } - OriginalLogicalType.TimeUnit convertTimeUnit(TimeUnit unit) { + LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { switch (unit.getSetField()) { case MICROS: - return OriginalLogicalType.TimeUnit.MICROS; + return LogicalTypeAnnotation.TimeUnit.MICROS; case MILLIS: - return OriginalLogicalType.TimeUnit.MILLIS; + return LogicalTypeAnnotation.TimeUnit.MILLIS; default: throw new RuntimeException("Unknown time unit " + unit); } @@ -995,8 +995,8 @@ private void buildChildren(Types.GroupBuilder builder, childBuilder.as(getOriginalType(schemaElement.logicalType)); } if (schemaElement.isSetConverted_type()) { - OriginalLogicalType originalType = getOriginalType(schemaElement.converted_type, schemaElement); - OriginalLogicalType newLogicalType = getOriginalType(schemaElement.logicalType); + LogicalTypeAnnotation originalType = getOriginalType(schemaElement.converted_type, schemaElement); + LogicalTypeAnnotation newLogicalType = getOriginalType(schemaElement.logicalType); if (!originalType.equals(newLogicalType)) { childBuilder.as(getOriginalType(schemaElement.converted_type, schemaElement)); } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 18aaf6ed66..c741f400b0 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -69,7 +69,7 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.OriginalLogicalType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Assert; import org.junit.Test; import org.apache.parquet.example.Paper; @@ -169,7 +169,7 @@ public void testEnumEquivalence() { } for (OriginalType original : OriginalType.values()) { assertEquals(original, parquetMetadataConverter.getOriginalType( - OriginalLogicalType.fromOriginalType(original).toConvertedType(), null).toOriginalType()); + LogicalTypeAnnotation.fromOriginalType(original).toConvertedType(), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { assertEquals(converted, parquetMetadataConverter.getOriginalType(converted, null).toConvertedType()); From 6b4ff74b329c37ec2137109032173d76f1068df8 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Wed, 4 Apr 2018 11:46:00 +0200 Subject: [PATCH 04/12] Address code review comments --- .../parquet/schema/LogicalTypeAnnotation.java | 68 ++++--------------- .../apache/parquet/schema/PrimitiveType.java | 7 +- .../java/org/apache/parquet/schema/Type.java | 6 +- .../java/org/apache/parquet/schema/Types.java | 2 +- .../converter/ParquetMetadataConverter.java | 11 ++- .../hadoop/metadata/ParquetMetadata.java | 2 + .../TestParquetMetadataConverter.java | 2 +- 7 files changed, 28 insertions(+), 70 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index af20dc166e..3518ccba33 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -63,7 +63,7 @@ public interface LogicalTypeAnnotation { /** * Helper method to convert the old representation of logical types (OriginalType) to new logical type. */ - static LogicalTypeAnnotation fromOriginalType(OriginalType originalType) { + static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, DecimalMetadata decimalMetadata) { if (originalType == null) { return null; } @@ -73,7 +73,9 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType) { case MAP: return MapLogicalTypeAnnotation.create(); case DECIMAL: - return DecimalLogicalTypeAnnotation.create(); + int scale = (decimalMetadata == null ? 0 : decimalMetadata.getScale()); + int precision = (decimalMetadata == null ? 0 : decimalMetadata.getPrecision()); + return DecimalLogicalTypeAnnotation.create(scale, precision); case LIST: return ListLogicalTypeAnnotation.create(); case DATE: @@ -113,7 +115,7 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType) { case MAP_KEY_VALUE: return MapKeyValueTypeAnnotation.create(); default: - return NullLogicalTypeAnnotation.create(); + throw new RuntimeException("Can't convert original type to logical type, unknown original type " + originalType); } } @@ -266,13 +268,8 @@ public int hashCode() { } class DecimalLogicalTypeAnnotation implements LogicalTypeAnnotation { - - private int scale; - private int precision; - - public static LogicalTypeAnnotation create() { - return new DecimalLogicalTypeAnnotation(0, 0); - } + private final int scale; + private final int precision; public static LogicalTypeAnnotation create(int scale, int precision) { return new DecimalLogicalTypeAnnotation(scale, precision); @@ -283,14 +280,6 @@ private DecimalLogicalTypeAnnotation(int scale, int precision) { this.precision = precision; } - public void setPrecision(int precision) { - this.precision = precision; - } - - public void setScale(int scale) { - this.scale = scale; - } - @Override public LogicalType toLogicalType() { return LogicalType.DECIMAL(new DecimalType(scale, precision)); @@ -561,43 +550,6 @@ public int hashCode() { } } - class NullLogicalTypeAnnotation implements LogicalTypeAnnotation { - private static final NullLogicalTypeAnnotation INSTANCE = new NullLogicalTypeAnnotation(); - - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - - private NullLogicalTypeAnnotation() { - } - - @Override - public LogicalType toLogicalType() { - return LogicalType.UNKNOWN(new NullType()); - } - - @Override - public ConvertedType toConvertedType() { - return null; - } - - @Override - public OriginalType toOriginalType() { - return null; - } - - @Override - public boolean equals(Object obj) { - return obj instanceof NullLogicalTypeAnnotation; - } - - @Override - public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode - return getClass().hashCode(); - } - } - class JsonLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final JsonLogicalTypeAnnotation INSTANCE = new JsonLogicalTypeAnnotation(); @@ -672,6 +624,9 @@ public int hashCode() { } } + // This logical type annotation is implemented to support backward compatibility with ConvertedType. + // The new logical type representation in parquet-format doesn't have any interval type, + // thus this annotation is mapped to UNKNOWN. class IntervalLogicalTypeAnnotation implements LogicalTypeAnnotation { private static IntervalLogicalTypeAnnotation INSTANCE = new IntervalLogicalTypeAnnotation(); @@ -709,6 +664,9 @@ public int hashCode() { } } + // This logical type annotation is implemented to support backward compatibility with ConvertedType. + // The new logical type representation in parquet-format doesn't have any key-value type, + // thus this annotation is mapped to UNKNOWN. This type shouldn't be used. class MapKeyValueTypeAnnotation implements LogicalTypeAnnotation { private static MapKeyValueTypeAnnotation INSTANCE = new MapKeyValueTypeAnnotation(); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 05f1ced1ad..7053b10388 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -446,15 +446,10 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name, OriginalType originalType, DecimalMetadata decimalMeta, ID id, ColumnOrder columnOrder) { - super(name, repetition, originalType, id); + super(name, repetition, originalType, decimalMeta, id); this.primitive = primitive; this.length = length; this.decimalMeta = decimalMeta; - if (originalType == OriginalType.DECIMAL) { - LogicalTypeAnnotation.DecimalLogicalTypeAnnotation originalLogicalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) getLogicalTypeAnnotation(); - originalLogicalType.setPrecision(decimalMeta.getPrecision()); - originalLogicalType.setScale(decimalMeta.getScale()); - } if (columnOrder == null) { columnOrder = primitive == PrimitiveTypeName.INT96 || originalType == OriginalType.INTERVAL diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index fa56104600..56ca9ce46d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -148,10 +148,14 @@ public Type(String name, Repetition repetition, OriginalType originalType) { * @param id (optional) the id of the fields. */ Type(String name, Repetition repetition, OriginalType originalType, ID id) { + this(name, repetition, originalType, null, id); + } + + Type(String name, Repetition repetition, OriginalType originalType, DecimalMetadata decimalMetadata, ID id) { super(); this.name = checkNotNull(name, "name"); this.repetition = checkNotNull(repetition, "repetition"); - this.logicalTypeAnnotation = originalType == null ? null : LogicalTypeAnnotation.fromOriginalType(originalType); + this.logicalTypeAnnotation = originalType == null ? null : LogicalTypeAnnotation.fromOriginalType(originalType, decimalMetadata); this.id = id; } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 413064156b..7490032516 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -252,7 +252,7 @@ protected final THIS repetition(Type.Repetition repetition) { * @return this builder for method chaining */ public THIS as(OriginalType type) { - this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(type); + this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(type, null); return self(); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index f4bc5e583d..3de07cdcb1 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -608,10 +608,9 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl case ENUM: return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create(); case DECIMAL: - if (schemaElement == null) { - return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(); - } - return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(schemaElement.scale, schemaElement.precision); + int scale = (schemaElement == null ? 0 : schemaElement.scale); + int precision = (schemaElement == null ? 0 : schemaElement.precision); + return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(scale, precision); case DATE: return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create(); case TIME_MILLIS: @@ -645,7 +644,7 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl case BSON: return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create(); default: - return LogicalTypeAnnotation.NullLogicalTypeAnnotation.create(); + throw new RuntimeException("Can't convert converted type to logical type, unknown converted type " + type); } } @@ -684,7 +683,7 @@ LogicalTypeAnnotation getOriginalType(LogicalType type) { } } - LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { + private LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { switch (unit.getSetField()) { case MICROS: return LogicalTypeAnnotation.TimeUnit.MICROS; diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java index 6e3f845328..523b6b36fe 100755 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java @@ -40,6 +40,8 @@ public class ParquetMetadata { private static final ObjectMapper objectMapper = new ObjectMapper(); + // Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature parquet-casdacing tests fail, + // because LogicalTypeAnnotation implementations are classes without any property. static { objectMapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index c741f400b0..b713ed7f9b 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -169,7 +169,7 @@ public void testEnumEquivalence() { } for (OriginalType original : OriginalType.values()) { assertEquals(original, parquetMetadataConverter.getOriginalType( - LogicalTypeAnnotation.fromOriginalType(original).toConvertedType(), null).toOriginalType()); + LogicalTypeAnnotation.fromOriginalType(original, null).toConvertedType(), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { assertEquals(converted, parquetMetadataConverter.getOriginalType(converted, null).toConvertedType()); From f80160508f53b3c954edacad191348c72b2146db Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Wed, 18 Apr 2018 14:28:42 +0200 Subject: [PATCH 05/12] Deprecate old constructors, address comment related to builder --- .../org/apache/parquet/schema/GroupType.java | 18 +++++-- .../parquet/schema/LogicalTypeAnnotation.java | 8 +++ .../apache/parquet/schema/PrimitiveType.java | 48 ++++++++++++++++-- .../java/org/apache/parquet/schema/Type.java | 21 +++++++- .../java/org/apache/parquet/schema/Types.java | 49 +++++++++++++++++-- .../parquet/schema/TestTypeBuilders.java | 47 ++++++++++++++++-- .../parquet/pig/PigSchemaConverter.java | 9 ++-- 7 files changed, 181 insertions(+), 19 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index 68dba979b8..f9162e26b3 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -47,7 +47,7 @@ public class GroupType extends Type { * @param fields the contained fields */ public GroupType(Repetition repetition, String name, List fields) { - this(repetition, name, null, fields, null); + this(repetition, name, (LogicalTypeAnnotation) null, fields, null); } /** @@ -88,6 +88,7 @@ public GroupType(Repetition repetition, String name, OriginalType originalType, * @param fields the contained fields * @param id the id of the field */ + @Deprecated GroupType(Repetition repetition, String name, OriginalType originalType, List fields, ID id) { super(name, repetition, originalType, id); this.fields = fields; @@ -97,6 +98,15 @@ public GroupType(Repetition repetition, String name, OriginalType originalType, } } + GroupType(Repetition repetition, String name, LogicalTypeAnnotation logicalTypeAnnotation, List fields, ID id) { + super(name, repetition, logicalTypeAnnotation, id); + this.fields = fields; + this.indexByName = new HashMap(); + for (int i = 0; i < fields.size(); i++) { + indexByName.put(fields.get(i).getName(), i); + } + } + /** * @param id the field id * @return a new GroupType with the same fields and a new id diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 3518ccba33..3fc54e97bf 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -280,6 +280,14 @@ private DecimalLogicalTypeAnnotation(int scale, int precision) { this.precision = precision; } + public int getPrecision() { + return precision; + } + + public int getScale() { + return scale; + } + @Override public LogicalType toLogicalType() { return LogicalType.DECIMAL(new DecimalType(scale, precision)); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 7053b10388..dfda0b8e78 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -389,9 +389,8 @@ abstract public void addValueToPrimitiveConverter( * @param primitive STRING, INT64, ... * @param name the name of the type */ - public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, - String name) { - this(repetition, primitive, 0, name, null, null, null); + public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, String name) { + this(repetition, primitive, 0, name, (LogicalTypeAnnotation) null, null, null); } /** @@ -401,7 +400,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, * @param name the name of the type */ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name) { - this(repetition, primitive, length, name, null, null, null); + this(repetition, primitive, length, name, (LogicalTypeAnnotation) null, null, null); } /** @@ -409,7 +408,10 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int len * @param primitive STRING, INT64, ... * @param name the name of the type * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...) + * + * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, String, LogicalTypeAnnotation)} instead */ + @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, String name, OriginalType originalType) { this(repetition, primitive, 0, name, originalType, null, null); @@ -436,13 +438,20 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, * @param originalType (optional) the original type (MAP, DECIMAL, UTF8, ...) * @param decimalMeta (optional) metadata about the decimal type * @param id the id of the field + * + * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID)} instead */ + @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name, OriginalType originalType, DecimalMetadata decimalMeta, ID id) { this(repetition, primitive, length, name, originalType, decimalMeta, id, null); } + /** + * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID, ColumnOrder)} instead + */ + @Deprecated PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name, OriginalType originalType, DecimalMetadata decimalMeta, ID id, ColumnOrder columnOrder) { @@ -459,6 +468,37 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this.columnOrder = requireValidColumnOrder(columnOrder); } + public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, + String name, LogicalTypeAnnotation logicalTypeAnnotation) { + this(repetition, primitive, 0, name, logicalTypeAnnotation, null, null); + } + + public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, + int length, String name, LogicalTypeAnnotation logicalTypeAnnotation, ID id) { + this(repetition, primitive, length, name, logicalTypeAnnotation, id, null); + } + + PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, + int length, String name, LogicalTypeAnnotation logicalTypeAnnotation, + ID id, ColumnOrder columnOrder) { + super(name, repetition, logicalTypeAnnotation, id); + this.primitive = primitive; + this.length = length; + if (getOriginalType() == OriginalType.DECIMAL) { + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimal = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; + this.decimalMeta = new DecimalMetadata(decimal.getPrecision(), decimal.getScale()); + } else { + this.decimalMeta = null; + } + + if (columnOrder == null) { + columnOrder = primitive == PrimitiveTypeName.INT96 || getOriginalType() == OriginalType.INTERVAL + ? ColumnOrder.undefined() + : ColumnOrder.typeDefined(); + } + this.columnOrder = requireValidColumnOrder(columnOrder); + } + private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) { if (primitive == PrimitiveTypeName.INT96) { Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED, diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index 56ca9ce46d..dbe7fedd2d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -128,7 +128,7 @@ public boolean isMoreRestrictiveThan(Repetition other) { */ @Deprecated public Type(String name, Repetition repetition) { - this(name, repetition, null, null); + this(name, repetition, (LogicalTypeAnnotation) null, null); } /** @@ -146,11 +146,18 @@ public Type(String name, Repetition repetition, OriginalType originalType) { * @param repetition OPTIONAL, REPEATED, REQUIRED * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...) * @param id (optional) the id of the fields. + * + * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead */ + @Deprecated Type(String name, Repetition repetition, OriginalType originalType, ID id) { this(name, repetition, originalType, null, id); } + /** + * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead + */ + @Deprecated Type(String name, Repetition repetition, OriginalType originalType, DecimalMetadata decimalMetadata, ID id) { super(); this.name = checkNotNull(name, "name"); @@ -159,6 +166,18 @@ public Type(String name, Repetition repetition, OriginalType originalType) { this.id = id; } + public Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation) { + this(name, repetition, logicalTypeAnnotation, null); + } + + Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation, ID id) { + super(); + this.name = checkNotNull(name, "name"); + this.repetition = checkNotNull(repetition, "repetition"); + this.logicalTypeAnnotation = logicalTypeAnnotation; + this.id = id; + } + /** * @param id * @return the same type with the id field set diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 7490032516..456ee25b0a 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.parquet.Preconditions; +import org.apache.parquet.format.DecimalType; import org.apache.parquet.schema.ColumnOrder.ColumnOrderName; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type.ID; @@ -250,14 +251,32 @@ protected final THIS repetition(Type.Repetition repetition) { * * @param type an {@code OriginalType} * @return this builder for method chaining + * + * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding logical type instead */ + @Deprecated public THIS as(OriginalType type) { this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(type, null); return self(); } + protected boolean newLogicalTypeSet; + + /** + * Adds a type annotation ({@link LogicalTypeAnnotation}) to the type being built. + *

+ * Type annotations are used to extend the types that parquet can store, by + * specifying how the primitive types should be interpreted. This keeps the + * set of primitive types to a minimum and reuses parquet's efficient + * encodings. For example, strings are stored as byte arrays (binary) with + * a UTF8 annotation. + * + * @param type an {@code {@link LogicalTypeAnnotation}} + * @return this builder for method chaining + */ public THIS as(LogicalTypeAnnotation type) { this.logicalTypeAnnotation = type; + this.newLogicalTypeSet = true; return self(); } @@ -351,6 +370,9 @@ public THIS length(int length) { return self(); } + private boolean precisionAlreadySet; + private boolean scaleAlreadySet; + /** * Adds the precision for a DECIMAL. *

@@ -360,9 +382,13 @@ public THIS length(int length) { * * @param precision an int precision value for the DECIMAL * @return this builder for method chaining + * + * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding decimal type instead */ + @Deprecated public THIS precision(int precision) { this.precision = precision; + precisionAlreadySet = true; return self(); } @@ -378,9 +404,13 @@ public THIS precision(int precision) { * * @param scale an int scale value for the DECIMAL * @return this builder for method chaining + * + * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding decimal type instead */ + @Deprecated public THIS scale(int scale) { this.scale = scale; + scaleAlreadySet = true; return self(); } @@ -498,11 +528,24 @@ private static long maxPrecision(int numBytes) { protected DecimalMetadata decimalMetadata() { DecimalMetadata meta = null; if (OriginalType.DECIMAL == getOriginalType()) { + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; + if (newLogicalTypeSet) { + if (scaleAlreadySet) { + Preconditions.checkArgument(this.scale == decimalType.getScale(), + "Decimal scale should match with the scale of the logical type"); + } + if (precisionAlreadySet) { + Preconditions.checkArgument(this.precision == decimalType.getPrecision(), + "Decimal precision should match with the precision of the logical type"); + } + scale = decimalType.getScale(); + precision = decimalType.getPrecision(); + } Preconditions.checkArgument(precision > 0, "Invalid DECIMAL precision: " + precision); - Preconditions.checkArgument(scale >= 0, - "Invalid DECIMAL scale: " + scale); - Preconditions.checkArgument(scale <= precision, + Preconditions.checkArgument(this.scale >= 0, + "Invalid DECIMAL scale: " + this.scale); + Preconditions.checkArgument(this.scale <= precision, "Invalid DECIMAL scale: cannot be greater than precision"); meta = new DecimalMetadata(precision, scale); } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 0b1f41a59c..7a5a2f3a54 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -1396,6 +1396,47 @@ public PrimitiveType call() { }); } + @Test + public void testDecimalLogicalType() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test + public void testDecimalLogicalTypeWithDeprecatedScale() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).scale(3).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test + public void testDecimalLogicalTypeWithDeprecatedPrecision() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).precision(4).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { + Types.required(BINARY) + .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)) + .scale(4).named("aDecimal"); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() { + Types.required(BINARY) + .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)) + .precision(5).named("aDecimal"); + } + /** * A convenience method to avoid a large number of @Test(expected=...) tests * @param message A String message to describe this assertion diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java index cf995346e7..88571e451f 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.pig.LoadPushDown.RequiredField; import org.apache.pig.LoadPushDown.RequiredFieldList; import org.apache.pig.data.DataType; @@ -412,7 +413,7 @@ private Type primitive(String name, PrimitiveTypeName primitive, OriginalType or } private PrimitiveType primitive(String name, PrimitiveTypeName primitive) { - return new PrimitiveType(Repetition.OPTIONAL, primitive, name, null); + return new PrimitiveType(Repetition.OPTIONAL, primitive, name, (LogicalTypeAnnotation) null); } /** From db30adbb8f0fe4e18bb4561b2b2642cfbbc98824 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Thu, 19 Apr 2018 15:03:29 +0200 Subject: [PATCH 06/12] Address latest comments --- .../main/java/org/apache/parquet/schema/GroupType.java | 1 - .../java/org/apache/parquet/schema/PrimitiveType.java | 10 +++------- .../src/main/java/org/apache/parquet/schema/Type.java | 7 ------- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index f9162e26b3..3ff25b6dba 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -88,7 +88,6 @@ public GroupType(Repetition repetition, String name, OriginalType originalType, * @param fields the contained fields * @param id the id of the field */ - @Deprecated GroupType(Repetition repetition, String name, OriginalType originalType, List fields, ID id) { super(name, repetition, originalType, id); this.fields = fields; diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index dfda0b8e78..0b354a6d77 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -409,7 +409,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int len * @param name the name of the type * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...) * - * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, String, LogicalTypeAnnotation)} instead + * @deprecated will be removed in 2.0.0; use {@link #PrimitiveType(Repetition, PrimitiveTypeName, String, LogicalTypeAnnotation)} instead */ @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, @@ -439,7 +439,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, * @param decimalMeta (optional) metadata about the decimal type * @param id the id of the field * - * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID)} instead + * @deprecated will be removed in 2.0.0; use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID)} instead */ @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, @@ -448,10 +448,6 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this(repetition, primitive, length, name, originalType, decimalMeta, id, null); } - /** - * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID, ColumnOrder)} instead - */ - @Deprecated PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name, OriginalType originalType, DecimalMetadata decimalMeta, ID id, ColumnOrder columnOrder) { @@ -473,7 +469,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this(repetition, primitive, 0, name, logicalTypeAnnotation, null, null); } - public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, + PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name, LogicalTypeAnnotation logicalTypeAnnotation, ID id) { this(repetition, primitive, length, name, logicalTypeAnnotation, id, null); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index dbe7fedd2d..d54a60a9bb 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -146,18 +146,11 @@ public Type(String name, Repetition repetition, OriginalType originalType) { * @param repetition OPTIONAL, REPEATED, REQUIRED * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...) * @param id (optional) the id of the fields. - * - * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead */ - @Deprecated Type(String name, Repetition repetition, OriginalType originalType, ID id) { this(name, repetition, originalType, null, id); } - /** - * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead - */ - @Deprecated Type(String name, Repetition repetition, OriginalType originalType, DecimalMetadata decimalMetadata, ID id) { super(); this.name = checkNotNull(name, "name"); From 047feb92f855e1854ab637a8c1e07340e4c604e2 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Tue, 24 Apr 2018 17:37:03 +0200 Subject: [PATCH 07/12] Introduce type visitors for logical types, and address code review comment related to byte-int casting --- .../parquet/schema/LogicalTypeAnnotation.java | 172 ++++++++++++++++-- .../apache/parquet/schema/PrimitiveType.java | 6 +- .../java/org/apache/parquet/schema/Type.java | 2 +- .../converter/ParquetMetadataConverter.java | 16 +- .../parquet/pig/PigSchemaConverter.java | 3 +- 5 files changed, 173 insertions(+), 26 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 3fc54e97bf..0d3bfafff6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -18,6 +18,7 @@ */ package org.apache.parquet.schema; +import org.apache.parquet.Preconditions; import org.apache.parquet.format.BsonType; import org.apache.parquet.format.ConvertedType; import org.apache.parquet.format.DateType; @@ -60,6 +61,13 @@ public interface LogicalTypeAnnotation { */ OriginalType toOriginalType(); + /** + * Visits this logical type with the given visitor + * + * @param logicalTypeAnnotationVisitor the visitor to visit this type + */ + void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor); + /** * Helper method to convert the old representation of logical types (OriginalType) to new logical type. */ @@ -91,21 +99,21 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, Decimal case TIME_MICROS: return TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); case UINT_8: - return IntLogicalTypeAnnotation.create((byte) 8, false); + return IntLogicalTypeAnnotation.create(8, false); case UINT_16: - return IntLogicalTypeAnnotation.create((byte) 16, false); + return IntLogicalTypeAnnotation.create(16, false); case UINT_32: - return IntLogicalTypeAnnotation.create((byte) 32, false); + return IntLogicalTypeAnnotation.create(32, false); case UINT_64: - return IntLogicalTypeAnnotation.create((byte) 64, false); + return IntLogicalTypeAnnotation.create(64, false); case INT_8: - return IntLogicalTypeAnnotation.create((byte) 8, true); + return IntLogicalTypeAnnotation.create(8, true); case INT_16: - return IntLogicalTypeAnnotation.create((byte) 16, true); + return IntLogicalTypeAnnotation.create(16, true); case INT_32: - return IntLogicalTypeAnnotation.create((byte) 32, true); + return IntLogicalTypeAnnotation.create(32, true); case INT_64: - return IntLogicalTypeAnnotation.create((byte) 64, true); + return IntLogicalTypeAnnotation.create(64, true); case ENUM: return EnumLogicalTypeAnnotation.create(); case JSON: @@ -144,6 +152,11 @@ public OriginalType toOriginalType() { return OriginalType.UTF8; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof StringLogicalTypeAnnotation; @@ -181,6 +194,11 @@ public OriginalType toOriginalType() { return OriginalType.MAP; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof MapLogicalTypeAnnotation; @@ -218,6 +236,11 @@ public OriginalType toOriginalType() { return OriginalType.LIST; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof ListLogicalTypeAnnotation; @@ -255,6 +278,11 @@ public OriginalType toOriginalType() { return OriginalType.ENUM; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof EnumLogicalTypeAnnotation; @@ -303,6 +331,11 @@ public OriginalType toOriginalType() { return OriginalType.DECIMAL; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { if (!(obj instanceof DecimalLogicalTypeAnnotation)) { @@ -343,6 +376,11 @@ public OriginalType toOriginalType() { return OriginalType.DATE; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof DateLogicalTypeAnnotation; @@ -413,6 +451,19 @@ public OriginalType toOriginalType() { } } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + + public TimeUnit getUnit() { + return unit; + } + + public boolean isAdjustedToUTC() { + return isAdjustedToUTC; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof TimeLogicalTypeAnnotation)) { @@ -470,6 +521,19 @@ public OriginalType toOriginalType() { } } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + + public TimeUnit getUnit() { + return unit; + } + + public boolean isAdjustedToUTC() { + return isAdjustedToUTC; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof TimestampLogicalTypeAnnotation)) { @@ -486,21 +550,25 @@ public int hashCode() { } class IntLogicalTypeAnnotation implements LogicalTypeAnnotation { - private final byte bitWidth; + private final int bitWidth; private final boolean isSigned; - public static LogicalTypeAnnotation create(byte bitWidth, boolean isSigned) { + public static LogicalTypeAnnotation create(int bitWidth, boolean isSigned) { + Preconditions.checkArgument( + bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64, + "Invalid bit width for integer logical type, " + bitWidth + " is not allowed, " + + "valid bit width values: 8, 16, 32, 64"); return new IntLogicalTypeAnnotation(bitWidth, isSigned); } - private IntLogicalTypeAnnotation(byte bitWidth, boolean isSigned) { + private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) { this.bitWidth = bitWidth; this.isSigned = isSigned; } @Override public LogicalType toLogicalType() { - return LogicalType.INTEGER(new IntType(bitWidth, isSigned)); + return LogicalType.INTEGER(new IntType((byte) bitWidth, isSigned)); } @Override @@ -543,6 +611,19 @@ public OriginalType toOriginalType() { } } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + + public int getBitWidth() { + return bitWidth; + } + + public boolean isSigned() { + return isSigned; + } + @Override public boolean equals(Object obj) { if (!(obj instanceof IntLogicalTypeAnnotation)) { @@ -583,6 +664,11 @@ public OriginalType toOriginalType() { return OriginalType.JSON; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof JsonLogicalTypeAnnotation; @@ -620,6 +706,11 @@ public OriginalType toOriginalType() { return OriginalType.BSON; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof BsonLogicalTypeAnnotation; @@ -660,6 +751,11 @@ public OriginalType toOriginalType() { return OriginalType.INTERVAL; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof IntervalLogicalTypeAnnotation; @@ -700,6 +796,11 @@ public OriginalType toOriginalType() { return OriginalType.MAP_KEY_VALUE; } + @Override + public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + logicalTypeAnnotationVisitor.visit(this); + } + @Override public boolean equals(Object obj) { return obj instanceof MapKeyValueTypeAnnotation; @@ -711,4 +812,51 @@ public int hashCode() { return getClass().hashCode(); } } + + /** + * Implement this interface to visit a logical type annotation in the schema. + * The default implementation for each logical type specific visitor method is empty. + * + * Example usage: logicalTypeAnnotation.accept(new LogicalTypeAnnotationVisitor() { ... }); + */ + interface LogicalTypeAnnotationVisitor { + default void visit(StringLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(MapLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(ListLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(EnumLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(DecimalLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(DateLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(TimeLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(TimestampLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(IntLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(JsonLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(BsonLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(IntervalLogicalTypeAnnotation logicalTypeAnnotation) { + } + + default void visit(MapKeyValueTypeAnnotation logicalTypeAnnotation) { + } + } } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 0b354a6d77..1edd3f86b1 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -409,7 +409,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int len * @param name the name of the type * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...) * - * @deprecated will be removed in 2.0.0; use {@link #PrimitiveType(Repetition, PrimitiveTypeName, String, LogicalTypeAnnotation)} instead + * @deprecated will be removed in 2.0.0; use builders in {@link Types} instead */ @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, @@ -439,7 +439,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, * @param decimalMeta (optional) metadata about the decimal type * @param id the id of the field * - * @deprecated will be removed in 2.0.0; use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID)} instead + * @deprecated will be removed in 2.0.0; use builders in {@link Types} instead */ @Deprecated public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, @@ -464,7 +464,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, this.columnOrder = requireValidColumnOrder(columnOrder); } - public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, + PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, String name, LogicalTypeAnnotation logicalTypeAnnotation) { this(repetition, primitive, 0, name, logicalTypeAnnotation, null, null); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index d54a60a9bb..69374a4653 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -159,7 +159,7 @@ public Type(String name, Repetition repetition, OriginalType originalType) { this.id = id; } - public Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation) { + Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation) { this(name, repetition, logicalTypeAnnotation, null); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 3de07cdcb1..8396e9e1fb 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -624,21 +624,21 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl case INTERVAL: return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.create(); case INT_8: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(8, true); case INT_16: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 16, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(16, true); case INT_32: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 32, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(32, true); case INT_64: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 64, true); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(64, true); case UINT_8: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 8, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(8, false); case UINT_16: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 16, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(16, false); case UINT_32: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 32, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(32, false); case UINT_64: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create((byte) 64, false); + return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(64, false); case JSON: return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create(); case BSON: diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java index 88571e451f..d1d896f79a 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java @@ -24,7 +24,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.pig.LoadPushDown.RequiredField; import org.apache.pig.LoadPushDown.RequiredFieldList; import org.apache.pig.data.DataType; @@ -413,7 +412,7 @@ private Type primitive(String name, PrimitiveTypeName primitive, OriginalType or } private PrimitiveType primitive(String name, PrimitiveTypeName primitive) { - return new PrimitiveType(Repetition.OPTIONAL, primitive, name, (LogicalTypeAnnotation) null); + return new PrimitiveType(Repetition.OPTIONAL, primitive, name, null); } /** From d11a09cd733faae7cdf6452459cd949efcffb453 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Tue, 24 Apr 2018 18:00:47 +0200 Subject: [PATCH 08/12] Move factory methods to the interface for each logical type subclass --- .../parquet/schema/LogicalTypeAnnotation.java | 174 +++++++++--------- .../parquet/schema/TestTypeBuilders.java | 16 +- .../converter/ParquetMetadataConverter.java | 66 +++---- 3 files changed, 129 insertions(+), 127 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 0d3bfafff6..b864689942 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -77,63 +77,116 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, Decimal } switch (originalType) { case UTF8: - return StringLogicalTypeAnnotation.create(); + return stringType(); case MAP: - return MapLogicalTypeAnnotation.create(); + return mapType(); case DECIMAL: int scale = (decimalMetadata == null ? 0 : decimalMetadata.getScale()); int precision = (decimalMetadata == null ? 0 : decimalMetadata.getPrecision()); - return DecimalLogicalTypeAnnotation.create(scale, precision); + return decimalType(scale, precision); case LIST: - return ListLogicalTypeAnnotation.create(); + return listType(); case DATE: - return DateLogicalTypeAnnotation.create(); + return dateType(); case INTERVAL: - return IntervalLogicalTypeAnnotation.create(); + return intervalType(); case TIMESTAMP_MILLIS: - return TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); case TIME_MILLIS: - return TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS); case UINT_8: - return IntLogicalTypeAnnotation.create(8, false); + return intType(8, false); case UINT_16: - return IntLogicalTypeAnnotation.create(16, false); + return intType(16, false); case UINT_32: - return IntLogicalTypeAnnotation.create(32, false); + return intType(32, false); case UINT_64: - return IntLogicalTypeAnnotation.create(64, false); + return intType(64, false); case INT_8: - return IntLogicalTypeAnnotation.create(8, true); + return intType(8, true); case INT_16: - return IntLogicalTypeAnnotation.create(16, true); + return intType(16, true); case INT_32: - return IntLogicalTypeAnnotation.create(32, true); + return intType(32, true); case INT_64: - return IntLogicalTypeAnnotation.create(64, true); + return intType(64, true); case ENUM: - return EnumLogicalTypeAnnotation.create(); + return enumType(); case JSON: - return JsonLogicalTypeAnnotation.create(); + return jsonType(); case BSON: - return BsonLogicalTypeAnnotation.create(); + return bsonType(); case MAP_KEY_VALUE: - return MapKeyValueTypeAnnotation.create(); + return mapKeyValueType(); default: throw new RuntimeException("Can't convert original type to logical type, unknown original type " + originalType); } } + + static StringLogicalTypeAnnotation stringType() { + return StringLogicalTypeAnnotation.INSTANCE; + } + + static MapLogicalTypeAnnotation mapType() { + return MapLogicalTypeAnnotation.INSTANCE; + } + + static ListLogicalTypeAnnotation listType() { + return ListLogicalTypeAnnotation.INSTANCE; + } + + static EnumLogicalTypeAnnotation enumType() { + return EnumLogicalTypeAnnotation.INSTANCE; + } + + static DecimalLogicalTypeAnnotation decimalType(final int scale, final int precision) { + return new DecimalLogicalTypeAnnotation(scale, precision); + } + + static DateLogicalTypeAnnotation dateType() { + return DateLogicalTypeAnnotation.INSTANCE; + } + + static TimeLogicalTypeAnnotation timeType(final boolean isAdjustedToUTC, final TimeUnit unit) { + return new TimeLogicalTypeAnnotation(isAdjustedToUTC, unit); + } + + static TimestampLogicalTypeAnnotation timestampType(final boolean isAdjustedToUTC, final TimeUnit unit) { + return new TimestampLogicalTypeAnnotation(isAdjustedToUTC, unit); + } + + static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean isSigned) { + Preconditions.checkArgument( + bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64, + "Invalid bit width for integer logical type, " + bitWidth + " is not allowed, " + + "valid bit width values: 8, 16, 32, 64"); + return new IntLogicalTypeAnnotation(bitWidth, isSigned); + } + + static JsonLogicalTypeAnnotation jsonType() { + return JsonLogicalTypeAnnotation.INSTANCE; + } + + static BsonLogicalTypeAnnotation bsonType() { + return BsonLogicalTypeAnnotation.INSTANCE; + } + + static IntervalLogicalTypeAnnotation intervalType() { + return IntervalLogicalTypeAnnotation.INSTANCE; + } + + static MapKeyValueTypeAnnotation mapKeyValueType() { + return MapKeyValueTypeAnnotation.INSTANCE; + } + class StringLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private StringLogicalTypeAnnotation() { } @@ -164,7 +217,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -172,10 +225,6 @@ public int hashCode() { class MapLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final MapLogicalTypeAnnotation INSTANCE = new MapLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private MapLogicalTypeAnnotation() { } @@ -206,7 +255,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -214,10 +263,6 @@ public int hashCode() { class ListLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final ListLogicalTypeAnnotation INSTANCE = new ListLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private ListLogicalTypeAnnotation() { } @@ -248,7 +293,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -256,10 +301,6 @@ public int hashCode() { class EnumLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final EnumLogicalTypeAnnotation INSTANCE = new EnumLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private EnumLogicalTypeAnnotation() { } @@ -290,7 +331,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -299,10 +340,6 @@ class DecimalLogicalTypeAnnotation implements LogicalTypeAnnotation { private final int scale; private final int precision; - public static LogicalTypeAnnotation create(int scale, int precision) { - return new DecimalLogicalTypeAnnotation(scale, precision); - } - private DecimalLogicalTypeAnnotation(int scale, int precision) { this.scale = scale; this.precision = precision; @@ -354,10 +391,6 @@ public int hashCode() { class DateLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final DateLogicalTypeAnnotation INSTANCE = new DateLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private DateLogicalTypeAnnotation() { } @@ -388,7 +421,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -413,10 +446,6 @@ class TimeLogicalTypeAnnotation implements LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; - public static LogicalTypeAnnotation create(boolean isAdjustedToUTC, TimeUnit unit) { - return new TimeLogicalTypeAnnotation(isAdjustedToUTC, unit); - } - private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.isAdjustedToUTC = isAdjustedToUTC; this.unit = unit; @@ -483,10 +512,6 @@ class TimestampLogicalTypeAnnotation implements LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; - public static LogicalTypeAnnotation create(boolean isAdjustedToUTC, TimeUnit unit) { - return new TimestampLogicalTypeAnnotation(isAdjustedToUTC, unit); - } - private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.isAdjustedToUTC = isAdjustedToUTC; this.unit = unit; @@ -553,13 +578,6 @@ class IntLogicalTypeAnnotation implements LogicalTypeAnnotation { private final int bitWidth; private final boolean isSigned; - public static LogicalTypeAnnotation create(int bitWidth, boolean isSigned) { - Preconditions.checkArgument( - bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64, - "Invalid bit width for integer logical type, " + bitWidth + " is not allowed, " + - "valid bit width values: 8, 16, 32, 64"); - return new IntLogicalTypeAnnotation(bitWidth, isSigned); - } private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) { this.bitWidth = bitWidth; @@ -642,10 +660,6 @@ public int hashCode() { class JsonLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final JsonLogicalTypeAnnotation INSTANCE = new JsonLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private JsonLogicalTypeAnnotation() { } @@ -676,7 +690,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -684,10 +698,6 @@ public int hashCode() { class BsonLogicalTypeAnnotation implements LogicalTypeAnnotation { private static final BsonLogicalTypeAnnotation INSTANCE = new BsonLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private BsonLogicalTypeAnnotation() { } @@ -718,7 +728,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -729,10 +739,6 @@ public int hashCode() { class IntervalLogicalTypeAnnotation implements LogicalTypeAnnotation { private static IntervalLogicalTypeAnnotation INSTANCE = new IntervalLogicalTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private IntervalLogicalTypeAnnotation() { } @@ -763,7 +769,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } @@ -774,10 +780,6 @@ public int hashCode() { class MapKeyValueTypeAnnotation implements LogicalTypeAnnotation { private static MapKeyValueTypeAnnotation INSTANCE = new MapKeyValueTypeAnnotation(); - public static LogicalTypeAnnotation create() { - return INSTANCE; - } - private MapKeyValueTypeAnnotation() { } @@ -808,7 +810,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // This type doesn't have any parameters, thus use class hashcode + // This type doesn't have any parameters, thus using class hashcode return getClass().hashCode(); } } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 7a5a2f3a54..a42e9e33b7 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -1399,41 +1399,41 @@ public PrimitiveType call() { @Test public void testDecimalLogicalType() { PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", - LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + LogicalTypeAnnotation.decimalType(3, 4)); PrimitiveType actual = Types.required(BINARY) - .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).named("aDecimal"); + .as(LogicalTypeAnnotation.decimalType(3, 4)).named("aDecimal"); Assert.assertEquals(expected, actual); } @Test public void testDecimalLogicalTypeWithDeprecatedScale() { PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", - LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + LogicalTypeAnnotation.decimalType(3, 4)); PrimitiveType actual = Types.required(BINARY) - .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).scale(3).named("aDecimal"); + .as(LogicalTypeAnnotation.decimalType(3, 4)).scale(3).named("aDecimal"); Assert.assertEquals(expected, actual); } @Test public void testDecimalLogicalTypeWithDeprecatedPrecision() { PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", - LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)); + LogicalTypeAnnotation.decimalType(3, 4)); PrimitiveType actual = Types.required(BINARY) - .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)).precision(4).named("aDecimal"); + .as(LogicalTypeAnnotation.decimalType(3, 4)).precision(4).named("aDecimal"); Assert.assertEquals(expected, actual); } @Test(expected = IllegalArgumentException.class) public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { Types.required(BINARY) - .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)) + .as(LogicalTypeAnnotation.decimalType(3, 4)) .scale(4).named("aDecimal"); } @Test(expected = IllegalArgumentException.class) public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() { Types.required(BINARY) - .as(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(3, 4)) + .as(LogicalTypeAnnotation.decimalType(3, 4)) .precision(5).named("aDecimal"); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 8396e9e1fb..0f535e1e2e 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -598,51 +598,51 @@ Type getType(PrimitiveTypeName type) { LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaElement) { switch (type) { case UTF8: - return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.stringType(); case MAP: - return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.mapType(); case MAP_KEY_VALUE: - return LogicalTypeAnnotation.MapKeyValueTypeAnnotation.create(); + return LogicalTypeAnnotation.mapKeyValueType(); case LIST: - return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.listType(); case ENUM: - return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.enumType(); case DECIMAL: int scale = (schemaElement == null ? 0 : schemaElement.scale); int precision = (schemaElement == null ? 0 : schemaElement.precision); - return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(scale, precision); + return LogicalTypeAnnotation.decimalType(scale, precision); case DATE: - return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.dateType(); case TIME_MILLIS: - return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS); case TIMESTAMP_MILLIS: - return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); case INTERVAL: - return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.intervalType(); case INT_8: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(8, true); + return LogicalTypeAnnotation.intType(8, true); case INT_16: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(16, true); + return LogicalTypeAnnotation.intType(16, true); case INT_32: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(32, true); + return LogicalTypeAnnotation.intType(32, true); case INT_64: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(64, true); + return LogicalTypeAnnotation.intType(64, true); case UINT_8: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(8, false); + return LogicalTypeAnnotation.intType(8, false); case UINT_16: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(16, false); + return LogicalTypeAnnotation.intType(16, false); case UINT_32: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(32, false); + return LogicalTypeAnnotation.intType(32, false); case UINT_64: - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(64, false); + return LogicalTypeAnnotation.intType(64, false); case JSON: - return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.jsonType(); case BSON: - return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.bsonType(); default: throw new RuntimeException("Can't convert converted type to logical type, unknown converted type " + type); } @@ -651,33 +651,33 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl LogicalTypeAnnotation getOriginalType(LogicalType type) { switch (type.getSetField()) { case MAP: - return LogicalTypeAnnotation.MapLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.mapType(); case BSON: - return LogicalTypeAnnotation.BsonLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.bsonType(); case DATE: - return LogicalTypeAnnotation.DateLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.dateType(); case ENUM: - return LogicalTypeAnnotation.EnumLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.enumType(); case JSON: - return LogicalTypeAnnotation.JsonLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.jsonType(); case LIST: - return LogicalTypeAnnotation.ListLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.listType(); case TIME: TimeType time = type.getTIME(); - return LogicalTypeAnnotation.TimeLogicalTypeAnnotation.create(time.isAdjustedToUTC, convertTimeUnit(time.unit)); + return LogicalTypeAnnotation.timeType(time.isAdjustedToUTC, convertTimeUnit(time.unit)); case STRING: - return LogicalTypeAnnotation.StringLogicalTypeAnnotation.create(); + return LogicalTypeAnnotation.stringType(); case DECIMAL: DecimalType decimal = type.getDECIMAL(); - return LogicalTypeAnnotation.DecimalLogicalTypeAnnotation.create(decimal.scale, decimal.precision); + return LogicalTypeAnnotation.decimalType(decimal.scale, decimal.precision); case INTEGER: IntType integer = type.getINTEGER(); - return LogicalTypeAnnotation.IntLogicalTypeAnnotation.create(integer.bitWidth, integer.isSigned); + return LogicalTypeAnnotation.intType(integer.bitWidth, integer.isSigned); case UNKNOWN: return null; case TIMESTAMP: TimestampType timestamp = type.getTIMESTAMP(); - return LogicalTypeAnnotation.TimestampLogicalTypeAnnotation.create(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); + return LogicalTypeAnnotation.timestampType(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit)); default: throw new RuntimeException("Unknown logical type " + type); } From eb432f7305087ec23f7cb9add793ff8ca440599d Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Tue, 8 May 2018 14:50:16 +0200 Subject: [PATCH 09/12] Incorporate new logical type parameters into schema language This commit * Address code review changes: replace Interval and KeyValue logical type factory methods with methods on logical type to indicate that they're not intended to be used publicly * Incorporate new parameters for logical types for timestamp, time and integer --- .../parquet/schema/LogicalTypeAnnotation.java | 305 +++++++++++++++--- .../parquet/schema/MessageTypeParser.java | 55 +++- .../apache/parquet/schema/PrimitiveType.java | 14 +- .../java/org/apache/parquet/schema/Types.java | 1 - .../parquet/parser/TestParquetParser.java | 43 ++- .../converter/ParquetMetadataConverter.java | 4 +- 6 files changed, 346 insertions(+), 76 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index b864689942..9a3a033bc6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -36,22 +36,118 @@ import org.apache.parquet.format.TimeType; import org.apache.parquet.format.TimestampType; +import java.util.List; import java.util.Objects; -public interface LogicalTypeAnnotation { +public abstract class LogicalTypeAnnotation { + public enum LogicalTypes { + MAP { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return mapType(); + } + }, + LIST { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return listType(); + } + }, + UTF8 { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return stringType(); + } + }, + MAP_KEY_VALUE { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return MapKeyValueTypeAnnotation.getInstance(); + } + }, + ENUM { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return enumType(); + } + }, + DECIMAL { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() != 2) { + throw new RuntimeException("Expecting 2 parameters for decimal logical type, got " + params.size()); + } + return decimalType(Integer.valueOf(params.get(1)), Integer.valueOf(params.get(0))); + } + }, + DATE { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return dateType(); + } + }, + TIME { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() != 2) { + throw new RuntimeException("Expecting 2 parameters for time logical type, got " + params.size()); + } + return timeType(Boolean.parseBoolean(params.get(1)), TimeUnit.valueOf(params.get(0))); + } + }, + TIMESTAMP { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() != 2) { + throw new RuntimeException("Expecting 2 parameters for timestamp logical type, got " + params.size()); + } + return timestampType(Boolean.parseBoolean(params.get(1)), TimeUnit.valueOf(params.get(0))); + } + }, + INT { + @Override + protected LogicalTypeAnnotation fromString(List params) { + if (params.size() != 2) { + throw new RuntimeException("Expecting 2 parameters for integer logical type, got " + params.size()); + } + return intType(Integer.valueOf(params.get(0)), Boolean.parseBoolean(params.get(1))); + } + }, + JSON { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return jsonType(); + } + }, + BSON { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return bsonType(); + } + }, + INTERVAL { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return IntervalLogicalTypeAnnotation.getInstance(); + } + }; + + protected abstract LogicalTypeAnnotation fromString(List params); + } + /** * Convert this parquet-mr logical type to parquet-format LogicalType. * * @return the parquet-format LogicalType representation of this logical type implementation */ - LogicalType toLogicalType(); + public abstract LogicalType toLogicalType(); /** * Convert this parquet-mr logical type to parquet-format ConvertedType. * * @return the parquet-format ConvertedType representation of this logical type implementation */ - ConvertedType toConvertedType(); + public abstract ConvertedType toConvertedType(); /** * Convert this logical type to old logical type representation in parquet-mr (if there's any). @@ -59,19 +155,33 @@ public interface LogicalTypeAnnotation { * * @return the OriginalType representation of the new logical type, or null if there's none */ - OriginalType toOriginalType(); + public abstract OriginalType toOriginalType(); /** * Visits this logical type with the given visitor * * @param logicalTypeAnnotationVisitor the visitor to visit this type */ - void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor); + public abstract void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor); + + public abstract LogicalTypes getType(); + + protected String typeParametersAsString() { + return ""; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getType()); + sb.append(typeParametersAsString()); + return sb.toString(); + } /** * Helper method to convert the old representation of logical types (OriginalType) to new logical type. */ - static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, DecimalMetadata decimalMetadata) { + public static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, DecimalMetadata decimalMetadata) { if (originalType == null) { return null; } @@ -89,7 +199,7 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, Decimal case DATE: return dateType(); case INTERVAL: - return intervalType(); + return IntervalLogicalTypeAnnotation.getInstance(); case TIMESTAMP_MILLIS: return timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: @@ -121,46 +231,46 @@ static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, Decimal case BSON: return bsonType(); case MAP_KEY_VALUE: - return mapKeyValueType(); + return MapKeyValueTypeAnnotation.getInstance(); default: throw new RuntimeException("Can't convert original type to logical type, unknown original type " + originalType); } } - static StringLogicalTypeAnnotation stringType() { + public static StringLogicalTypeAnnotation stringType() { return StringLogicalTypeAnnotation.INSTANCE; } - static MapLogicalTypeAnnotation mapType() { + public static MapLogicalTypeAnnotation mapType() { return MapLogicalTypeAnnotation.INSTANCE; } - static ListLogicalTypeAnnotation listType() { + public static ListLogicalTypeAnnotation listType() { return ListLogicalTypeAnnotation.INSTANCE; } - static EnumLogicalTypeAnnotation enumType() { + public static EnumLogicalTypeAnnotation enumType() { return EnumLogicalTypeAnnotation.INSTANCE; } - static DecimalLogicalTypeAnnotation decimalType(final int scale, final int precision) { + public static DecimalLogicalTypeAnnotation decimalType(final int scale, final int precision) { return new DecimalLogicalTypeAnnotation(scale, precision); } - static DateLogicalTypeAnnotation dateType() { + public static DateLogicalTypeAnnotation dateType() { return DateLogicalTypeAnnotation.INSTANCE; } - static TimeLogicalTypeAnnotation timeType(final boolean isAdjustedToUTC, final TimeUnit unit) { + public static TimeLogicalTypeAnnotation timeType(final boolean isAdjustedToUTC, final TimeUnit unit) { return new TimeLogicalTypeAnnotation(isAdjustedToUTC, unit); } - static TimestampLogicalTypeAnnotation timestampType(final boolean isAdjustedToUTC, final TimeUnit unit) { + public static TimestampLogicalTypeAnnotation timestampType(final boolean isAdjustedToUTC, final TimeUnit unit) { return new TimestampLogicalTypeAnnotation(isAdjustedToUTC, unit); } - static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean isSigned) { + public static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean isSigned) { Preconditions.checkArgument( bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64, "Invalid bit width for integer logical type, " + bitWidth + " is not allowed, " + @@ -168,23 +278,15 @@ static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean isSign return new IntLogicalTypeAnnotation(bitWidth, isSigned); } - static JsonLogicalTypeAnnotation jsonType() { + public static JsonLogicalTypeAnnotation jsonType() { return JsonLogicalTypeAnnotation.INSTANCE; } - static BsonLogicalTypeAnnotation bsonType() { + public static BsonLogicalTypeAnnotation bsonType() { return BsonLogicalTypeAnnotation.INSTANCE; } - static IntervalLogicalTypeAnnotation intervalType() { - return IntervalLogicalTypeAnnotation.INSTANCE; - } - - static MapKeyValueTypeAnnotation mapKeyValueType() { - return MapKeyValueTypeAnnotation.INSTANCE; - } - - class StringLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation(); private StringLogicalTypeAnnotation() { @@ -210,6 +312,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.UTF8; + } + @Override public boolean equals(Object obj) { return obj instanceof StringLogicalTypeAnnotation; @@ -222,7 +329,7 @@ public int hashCode() { } } - class MapLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class MapLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final MapLogicalTypeAnnotation INSTANCE = new MapLogicalTypeAnnotation(); private MapLogicalTypeAnnotation() { @@ -248,6 +355,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.MAP; + } + @Override public boolean equals(Object obj) { return obj instanceof MapLogicalTypeAnnotation; @@ -260,7 +372,7 @@ public int hashCode() { } } - class ListLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class ListLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final ListLogicalTypeAnnotation INSTANCE = new ListLogicalTypeAnnotation(); private ListLogicalTypeAnnotation() { @@ -286,6 +398,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.LIST; + } + @Override public boolean equals(Object obj) { return obj instanceof ListLogicalTypeAnnotation; @@ -298,7 +415,7 @@ public int hashCode() { } } - class EnumLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class EnumLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final EnumLogicalTypeAnnotation INSTANCE = new EnumLogicalTypeAnnotation(); private EnumLogicalTypeAnnotation() { @@ -324,6 +441,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.ENUM; + } + @Override public boolean equals(Object obj) { return obj instanceof EnumLogicalTypeAnnotation; @@ -336,7 +458,7 @@ public int hashCode() { } } - class DecimalLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class DecimalLogicalTypeAnnotation extends LogicalTypeAnnotation { private final int scale; private final int precision; @@ -373,6 +495,22 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.DECIMAL; + } + + @Override + protected String typeParametersAsString() { + StringBuilder sb = new StringBuilder(); + sb.append("("); + sb.append(precision); + sb.append(","); + sb.append(scale); + sb.append(")"); + return sb.toString(); + } + @Override public boolean equals(Object obj) { if (!(obj instanceof DecimalLogicalTypeAnnotation)) { @@ -388,7 +526,7 @@ public int hashCode() { } } - class DateLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class DateLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final DateLogicalTypeAnnotation INSTANCE = new DateLogicalTypeAnnotation(); private DateLogicalTypeAnnotation() { @@ -414,6 +552,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.DATE; + } + @Override public boolean equals(Object obj) { return obj instanceof DateLogicalTypeAnnotation; @@ -426,7 +569,7 @@ public int hashCode() { } } - enum TimeUnit { + public enum TimeUnit { MILLIS, MICROS } @@ -442,7 +585,7 @@ static org.apache.parquet.format.TimeUnit convertUnit(TimeUnit unit) { } } - class TimeLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class TimeLogicalTypeAnnotation extends LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; @@ -485,6 +628,22 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.TIME; + } + + @Override + protected String typeParametersAsString() { + StringBuilder sb = new StringBuilder(); + sb.append("("); + sb.append(unit); + sb.append(","); + sb.append(isAdjustedToUTC); + sb.append(")"); + return sb.toString(); + } + public TimeUnit getUnit() { return unit; } @@ -508,7 +667,7 @@ public int hashCode() { } } - class TimestampLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class TimestampLogicalTypeAnnotation extends LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; @@ -551,6 +710,22 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.TIMESTAMP; + } + + @Override + protected String typeParametersAsString() { + StringBuilder sb = new StringBuilder(); + sb.append("("); + sb.append(unit); + sb.append(","); + sb.append(isAdjustedToUTC); + sb.append(")"); + return sb.toString(); + } + public TimeUnit getUnit() { return unit; } @@ -574,7 +749,7 @@ public int hashCode() { } } - class IntLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class IntLogicalTypeAnnotation extends LogicalTypeAnnotation { private final int bitWidth; private final boolean isSigned; @@ -634,6 +809,22 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.INT; + } + + @Override + protected String typeParametersAsString() { + StringBuilder sb = new StringBuilder(); + sb.append("("); + sb.append(bitWidth); + sb.append(","); + sb.append(isSigned); + sb.append(")"); + return sb.toString(); + } + public int getBitWidth() { return bitWidth; } @@ -657,7 +848,7 @@ public int hashCode() { } } - class JsonLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class JsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final JsonLogicalTypeAnnotation INSTANCE = new JsonLogicalTypeAnnotation(); private JsonLogicalTypeAnnotation() { @@ -683,6 +874,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.JSON; + } + @Override public boolean equals(Object obj) { return obj instanceof JsonLogicalTypeAnnotation; @@ -695,7 +891,7 @@ public int hashCode() { } } - class BsonLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class BsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private static final BsonLogicalTypeAnnotation INSTANCE = new BsonLogicalTypeAnnotation(); private BsonLogicalTypeAnnotation() { @@ -721,6 +917,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.BSON; + } + @Override public boolean equals(Object obj) { return obj instanceof BsonLogicalTypeAnnotation; @@ -736,9 +937,13 @@ public int hashCode() { // This logical type annotation is implemented to support backward compatibility with ConvertedType. // The new logical type representation in parquet-format doesn't have any interval type, // thus this annotation is mapped to UNKNOWN. - class IntervalLogicalTypeAnnotation implements LogicalTypeAnnotation { + public static class IntervalLogicalTypeAnnotation extends LogicalTypeAnnotation { private static IntervalLogicalTypeAnnotation INSTANCE = new IntervalLogicalTypeAnnotation(); + public static LogicalTypeAnnotation getInstance() { + return INSTANCE; + } + private IntervalLogicalTypeAnnotation() { } @@ -762,6 +967,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.INTERVAL; + } + @Override public boolean equals(Object obj) { return obj instanceof IntervalLogicalTypeAnnotation; @@ -777,9 +987,13 @@ public int hashCode() { // This logical type annotation is implemented to support backward compatibility with ConvertedType. // The new logical type representation in parquet-format doesn't have any key-value type, // thus this annotation is mapped to UNKNOWN. This type shouldn't be used. - class MapKeyValueTypeAnnotation implements LogicalTypeAnnotation { + public static class MapKeyValueTypeAnnotation extends LogicalTypeAnnotation { private static MapKeyValueTypeAnnotation INSTANCE = new MapKeyValueTypeAnnotation(); + public static MapKeyValueTypeAnnotation getInstance() { + return INSTANCE; + } + private MapKeyValueTypeAnnotation() { } @@ -803,6 +1017,11 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { logicalTypeAnnotationVisitor.visit(this); } + @Override + public LogicalTypes getType() { + return LogicalTypes.MAP_KEY_VALUE; + } + @Override public boolean equals(Object obj) { return obj instanceof MapKeyValueTypeAnnotation; @@ -818,10 +1037,10 @@ public int hashCode() { /** * Implement this interface to visit a logical type annotation in the schema. * The default implementation for each logical type specific visitor method is empty. - * + *

* Example usage: logicalTypeAnnotation.accept(new LogicalTypeAnnotationVisitor() { ... }); */ - interface LogicalTypeAnnotationVisitor { + public interface LogicalTypeAnnotationVisitor { default void visit(StringLogicalTypeAnnotation logicalTypeAnnotation) { } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java index f0c178af68..4b134c5edb 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -18,7 +18,9 @@ */ package org.apache.parquet.schema; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Locale; import java.util.StringTokenizer; @@ -161,25 +163,44 @@ private static void addPrimitiveType(Tokenizer st, PrimitiveTypeName type, Repet t = st.nextToken(); OriginalType originalType = null; if (t.equalsIgnoreCase("(")) { - originalType = OriginalType.valueOf(st.nextToken()); - childBuilder.as(originalType); - if (OriginalType.DECIMAL == originalType) { + t = st.nextToken(); + if (isLogicalType(t)) { + LogicalTypeAnnotation.LogicalTypes logicalType = LogicalTypeAnnotation.LogicalTypes.valueOf(t); t = st.nextToken(); - // parse precision and scale - if (t.equalsIgnoreCase("(")) { - childBuilder.precision(Integer.parseInt(st.nextToken())); - t = st.nextToken(); - if (t.equalsIgnoreCase(",")) { - childBuilder.scale(Integer.parseInt(st.nextToken())); + List tokens = new ArrayList<>(); + if ("(".equals(t)) { + while (!")".equals(t)) { + if (!(",".equals(t) || "(".equals(t) || ")".equals(t))) { + tokens.add(t); + } t = st.nextToken(); } - check(t, ")", "decimal type ended by )", st); t = st.nextToken(); } + LogicalTypeAnnotation logicalTypeAnnotation = logicalType.fromString(tokens); + childBuilder.as(logicalTypeAnnotation); } else { - t = st.nextToken(); + // Try to parse as old logical type, called OriginalType + originalType = OriginalType.valueOf(t); + childBuilder.as(originalType); + if (OriginalType.DECIMAL == originalType) { + t = st.nextToken(); + // parse precision and scale + if (t.equalsIgnoreCase("(")) { + childBuilder.precision(Integer.parseInt(st.nextToken())); + t = st.nextToken(); + if (t.equalsIgnoreCase(",")) { + childBuilder.scale(Integer.parseInt(st.nextToken())); + t = st.nextToken(); + } + check(t, ")", "decimal type ended by )", st); + t = st.nextToken(); + } + } else { + t = st.nextToken(); + } } - check(t, ")", "original type ended by )", st); + check(t, ")", "logical type ended by )", st); t = st.nextToken(); } if (t.equals("=")) { @@ -195,6 +216,10 @@ private static void addPrimitiveType(Tokenizer st, PrimitiveTypeName type, Repet } } + private static boolean isLogicalType(String t) { + return Arrays.stream(LogicalTypeAnnotation.LogicalTypes.values()).anyMatch((type) -> type.name().equals(t)); + } + private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) { try { return PrimitiveTypeName.valueOf(t.toUpperCase(Locale.ENGLISH)); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 1edd3f86b1..369b277b83 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -593,17 +593,9 @@ public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("(" + length + ")"); } sb.append(" ").append(getName()); - if (getOriginalType() != null) { - sb.append(" (").append(getOriginalType()); - DecimalMetadata meta = getDecimalMetadata(); - if (meta != null) { - sb.append("(") - .append(meta.getPrecision()) - .append(",") - .append(meta.getScale()) - .append(")"); - } - sb.append(")"); + if (getLogicalTypeAnnotation() != null) { + // TODO: should we print decimal metadata too? + sb.append(" (").append(getLogicalTypeAnnotation().toString()).append(")"); } if (getId() != null) { sb.append(" = ").append(getId()); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 456ee25b0a..916fb29991 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -23,7 +23,6 @@ import java.util.List; import org.apache.parquet.Preconditions; -import org.apache.parquet.format.DecimalType; import org.apache.parquet.schema.ColumnOrder.ColumnOrderName; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type.ID; diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index e2f737abf9..e010137c21 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -30,6 +30,7 @@ import static org.apache.parquet.schema.OriginalType.*; import static org.apache.parquet.schema.Types.buildMessage; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Test; import org.apache.parquet.schema.GroupType; @@ -248,6 +249,8 @@ public void testTimeAnnotations() { " required int32 time (TIME_MILLIS);" + " required int64 timestamp (TIMESTAMP_MILLIS);" + " required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" + + " required int32 newTime (TIME(MILLIS,true));" + + " required int64 newTimestamp (TIMESTAMP(MILLIS,false));" + "}\n"; MessageType parsed = MessageTypeParser.parseMessageType(message); @@ -256,7 +259,9 @@ public void testTimeAnnotations() { .required(INT32).as(TIME_MILLIS).named("time") .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp") .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval") - .named("TimeMessage"); + .required(INT32).as(LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)).named("newTime") + .required(INT64).as(LogicalTypeAnnotation.timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)).named("newTimestamp") + .named("TimeMessage"); assertEquals(expected, parsed); MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString()); @@ -293,6 +298,36 @@ public void testIntAnnotations() { assertEquals(expected, reparsed); } + @Test + public void testIntegerAnnotations() { + String message = "message IntMessage {" + + " required int32 i8 (INT(8,true));" + + " required int32 i16 (INT(16,true));" + + " required int32 i32 (INT(32,true));" + + " required int64 i64 (INT(64,true));" + + " required int32 u8 (INT(8,false));" + + " required int32 u16 (INT(16,false));" + + " required int32 u32 (INT(32,false));" + + " required int64 u64 (INT(64,false));" + + "}\n"; + + MessageType parsed = MessageTypeParser.parseMessageType(message); + MessageType expected = Types.buildMessage() + .required(INT32).as(LogicalTypeAnnotation.intType(8, true)).named("i8") + .required(INT32).as(LogicalTypeAnnotation.intType(16, true)).named("i16") + .required(INT32).as(LogicalTypeAnnotation.intType(32, true)).named("i32") + .required(INT64).as(LogicalTypeAnnotation.intType(64, true)).named("i64") + .required(INT32).as(LogicalTypeAnnotation.intType(8, false)).named("u8") + .required(INT32).as(LogicalTypeAnnotation.intType(16, false)).named("u16") + .required(INT32).as(LogicalTypeAnnotation.intType(32, false)).named("u32") + .required(INT64).as(LogicalTypeAnnotation.intType(64, false)).named("u64") + .named("IntMessage"); + + assertEquals(expected, parsed); + MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } + @Test public void testEmbeddedAnnotations() { String message = "message EmbeddedMessage {" + diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 0f535e1e2e..da5adc9008 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -602,7 +602,7 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl case MAP: return LogicalTypeAnnotation.mapType(); case MAP_KEY_VALUE: - return LogicalTypeAnnotation.mapKeyValueType(); + return LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(); case LIST: return LogicalTypeAnnotation.listType(); case ENUM: @@ -622,7 +622,7 @@ LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaEl case TIMESTAMP_MICROS: return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); case INTERVAL: - return LogicalTypeAnnotation.intervalType(); + return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance(); case INT_8: return LogicalTypeAnnotation.intType(8, true); case INT_16: From 3c426d98210c7bfa72c9ae8e3929f9da647299df Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Fri, 11 May 2018 14:52:52 +0200 Subject: [PATCH 10/12] Address latest code review comments --- .../parquet/schema/LogicalTypeAnnotation.java | 262 ++---------------- .../parquet/schema/MessageTypeParser.java | 4 +- .../parquet/parser/TestParquetParser.java | 25 +- .../converter/ParquetMetadataConverter.java | 170 +++++++++++- .../TestParquetMetadataConverter.java | 4 +- 5 files changed, 213 insertions(+), 252 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 9a3a033bc6..4a2ff5cdf1 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -19,28 +19,13 @@ package org.apache.parquet.schema; import org.apache.parquet.Preconditions; -import org.apache.parquet.format.BsonType; -import org.apache.parquet.format.ConvertedType; -import org.apache.parquet.format.DateType; -import org.apache.parquet.format.DecimalType; -import org.apache.parquet.format.EnumType; -import org.apache.parquet.format.IntType; -import org.apache.parquet.format.JsonType; -import org.apache.parquet.format.ListType; -import org.apache.parquet.format.LogicalType; -import org.apache.parquet.format.MapType; -import org.apache.parquet.format.MicroSeconds; -import org.apache.parquet.format.MilliSeconds; -import org.apache.parquet.format.NullType; -import org.apache.parquet.format.StringType; -import org.apache.parquet.format.TimeType; -import org.apache.parquet.format.TimestampType; import java.util.List; import java.util.Objects; public abstract class LogicalTypeAnnotation { - public enum LogicalTypes { + // This is a private enum intended only for internal use for parsing the schema + public enum LogicalTypeToken { MAP { @Override protected LogicalTypeAnnotation fromString(List params) { @@ -135,20 +120,6 @@ protected LogicalTypeAnnotation fromString(List params) { protected abstract LogicalTypeAnnotation fromString(List params); } - /** - * Convert this parquet-mr logical type to parquet-format LogicalType. - * - * @return the parquet-format LogicalType representation of this logical type implementation - */ - public abstract LogicalType toLogicalType(); - - /** - * Convert this parquet-mr logical type to parquet-format ConvertedType. - * - * @return the parquet-format ConvertedType representation of this logical type implementation - */ - public abstract ConvertedType toConvertedType(); - /** * Convert this logical type to old logical type representation in parquet-mr (if there's any). * Those logical type implementations, which don't have a corresponding mapping should return null. @@ -164,7 +135,7 @@ protected LogicalTypeAnnotation fromString(List params) { */ public abstract void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor); - public abstract LogicalTypes getType(); + protected abstract LogicalTypeToken getType(); protected String typeParametersAsString() { return ""; @@ -237,7 +208,6 @@ public static LogicalTypeAnnotation fromOriginalType(OriginalType originalType, } } - public static StringLogicalTypeAnnotation stringType() { return StringLogicalTypeAnnotation.INSTANCE; } @@ -292,16 +262,6 @@ public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation { private StringLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.STRING(new StringType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.UTF8; - } - @Override public OriginalType toOriginalType() { return OriginalType.UTF8; @@ -313,8 +273,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.UTF8; + protected LogicalTypeToken getType() { + return LogicalTypeToken.UTF8; } @Override @@ -335,16 +295,6 @@ public static class MapLogicalTypeAnnotation extends LogicalTypeAnnotation { private MapLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.MAP(new MapType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.MAP; - } - @Override public OriginalType toOriginalType() { return OriginalType.MAP; @@ -356,8 +306,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.MAP; + protected LogicalTypeToken getType() { + return LogicalTypeToken.MAP; } @Override @@ -378,16 +328,6 @@ public static class ListLogicalTypeAnnotation extends LogicalTypeAnnotation { private ListLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.LIST(new ListType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.LIST; - } - @Override public OriginalType toOriginalType() { return OriginalType.LIST; @@ -399,8 +339,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.LIST; + protected LogicalTypeToken getType() { + return LogicalTypeToken.LIST; } @Override @@ -421,16 +361,6 @@ public static class EnumLogicalTypeAnnotation extends LogicalTypeAnnotation { private EnumLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.ENUM(new EnumType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.ENUM; - } - @Override public OriginalType toOriginalType() { return OriginalType.ENUM; @@ -442,8 +372,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.ENUM; + protected LogicalTypeToken getType() { + return LogicalTypeToken.ENUM; } @Override @@ -475,16 +405,6 @@ public int getScale() { return scale; } - @Override - public LogicalType toLogicalType() { - return LogicalType.DECIMAL(new DecimalType(scale, precision)); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.DECIMAL; - } - @Override public OriginalType toOriginalType() { return OriginalType.DECIMAL; @@ -496,8 +416,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.DECIMAL; + protected LogicalTypeToken getType() { + return LogicalTypeToken.DECIMAL; } @Override @@ -532,16 +452,6 @@ public static class DateLogicalTypeAnnotation extends LogicalTypeAnnotation { private DateLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.DATE(new DateType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.DATE; - } - @Override public OriginalType toOriginalType() { return OriginalType.DATE; @@ -553,8 +463,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.DATE; + protected LogicalTypeToken getType() { + return LogicalTypeToken.DATE; } @Override @@ -574,17 +484,6 @@ public enum TimeUnit { MICROS } - static org.apache.parquet.format.TimeUnit convertUnit(TimeUnit unit) { - switch (unit) { - case MICROS: - return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds()); - case MILLIS: - return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); - default: - throw new RuntimeException("Unknown time unit " + unit); - } - } - public static class TimeLogicalTypeAnnotation extends LogicalTypeAnnotation { private final boolean isAdjustedToUTC; private final TimeUnit unit; @@ -594,23 +493,6 @@ private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - @Override - public LogicalType toLogicalType() { - return LogicalType.TIME(new TimeType(isAdjustedToUTC, convertUnit(unit))); - } - - @Override - public ConvertedType toConvertedType() { - switch (toOriginalType()) { - case TIME_MILLIS: - return ConvertedType.TIME_MILLIS; - case TIME_MICROS: - return ConvertedType.TIME_MICROS; - default: - throw new RuntimeException("Unknown converted type for " + toOriginalType()); - } - } - @Override public OriginalType toOriginalType() { switch (unit) { @@ -629,8 +511,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.TIME; + protected LogicalTypeToken getType() { + return LogicalTypeToken.TIME; } @Override @@ -676,23 +558,6 @@ private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - @Override - public LogicalType toLogicalType() { - return LogicalType.TIMESTAMP(new TimestampType(isAdjustedToUTC, convertUnit(unit))); - } - - @Override - public ConvertedType toConvertedType() { - switch (toOriginalType()) { - case TIMESTAMP_MICROS: - return ConvertedType.TIMESTAMP_MICROS; - case TIMESTAMP_MILLIS: - return ConvertedType.TIMESTAMP_MILLIS; - default: - throw new RuntimeException("Unknown converted type for " + unit); - } - } - @Override public OriginalType toOriginalType() { switch (unit) { @@ -711,8 +576,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.TIMESTAMP; + protected LogicalTypeToken getType() { + return LogicalTypeToken.TIMESTAMP; } @Override @@ -759,35 +624,6 @@ private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) { this.isSigned = isSigned; } - @Override - public LogicalType toLogicalType() { - return LogicalType.INTEGER(new IntType((byte) bitWidth, isSigned)); - } - - @Override - public ConvertedType toConvertedType() { - switch (toOriginalType()) { - case INT_8: - return ConvertedType.INT_8; - case INT_16: - return ConvertedType.INT_16; - case INT_32: - return ConvertedType.INT_32; - case INT_64: - return ConvertedType.INT_64; - case UINT_8: - return ConvertedType.UINT_8; - case UINT_16: - return ConvertedType.UINT_16; - case UINT_32: - return ConvertedType.UINT_32; - case UINT_64: - return ConvertedType.UINT_64; - default: - throw new RuntimeException("Unknown original type " + toOriginalType()); - } - } - @Override public OriginalType toOriginalType() { switch (bitWidth) { @@ -810,8 +646,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.INT; + protected LogicalTypeToken getType() { + return LogicalTypeToken.INT; } @Override @@ -854,16 +690,6 @@ public static class JsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private JsonLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.JSON(new JsonType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.JSON; - } - @Override public OriginalType toOriginalType() { return OriginalType.JSON; @@ -875,8 +701,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.JSON; + protected LogicalTypeToken getType() { + return LogicalTypeToken.JSON; } @Override @@ -897,16 +723,6 @@ public static class BsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private BsonLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.BSON(new BsonType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.BSON; - } - @Override public OriginalType toOriginalType() { return OriginalType.BSON; @@ -918,8 +734,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.BSON; + protected LogicalTypeToken getType() { + return LogicalTypeToken.BSON; } @Override @@ -947,16 +763,6 @@ public static LogicalTypeAnnotation getInstance() { private IntervalLogicalTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.UNKNOWN(new NullType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.INTERVAL; - } - @Override public OriginalType toOriginalType() { return OriginalType.INTERVAL; @@ -968,8 +774,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.INTERVAL; + protected LogicalTypeToken getType() { + return LogicalTypeToken.INTERVAL; } @Override @@ -997,16 +803,6 @@ public static MapKeyValueTypeAnnotation getInstance() { private MapKeyValueTypeAnnotation() { } - @Override - public LogicalType toLogicalType() { - return LogicalType.UNKNOWN(new NullType()); - } - - @Override - public ConvertedType toConvertedType() { - return ConvertedType.MAP_KEY_VALUE; - } - @Override public OriginalType toOriginalType() { return OriginalType.MAP_KEY_VALUE; @@ -1018,8 +814,8 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - public LogicalTypes getType() { - return LogicalTypes.MAP_KEY_VALUE; + protected LogicalTypeToken getType() { + return LogicalTypeToken.MAP_KEY_VALUE; } @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java index 4b134c5edb..a3051eae3c 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java @@ -165,7 +165,7 @@ private static void addPrimitiveType(Tokenizer st, PrimitiveTypeName type, Repet if (t.equalsIgnoreCase("(")) { t = st.nextToken(); if (isLogicalType(t)) { - LogicalTypeAnnotation.LogicalTypes logicalType = LogicalTypeAnnotation.LogicalTypes.valueOf(t); + LogicalTypeAnnotation.LogicalTypeToken logicalType = LogicalTypeAnnotation.LogicalTypeToken.valueOf(t); t = st.nextToken(); List tokens = new ArrayList<>(); if ("(".equals(t)) { @@ -217,7 +217,7 @@ private static void addPrimitiveType(Tokenizer st, PrimitiveTypeName type, Repet } private static boolean isLogicalType(String t) { - return Arrays.stream(LogicalTypeAnnotation.LogicalTypes.values()).anyMatch((type) -> type.name().equals(t)); + return Arrays.stream(LogicalTypeAnnotation.LogicalTypeToken.values()).anyMatch((type) -> type.name().equals(t)); } private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) { diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index e010137c21..5082501af6 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -18,6 +18,10 @@ */ package org.apache.parquet.parser; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.junit.Assert.assertEquals; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; @@ -30,7 +34,6 @@ import static org.apache.parquet.schema.OriginalType.*; import static org.apache.parquet.schema.Types.buildMessage; -import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Test; import org.apache.parquet.schema.GroupType; @@ -259,8 +262,8 @@ public void testTimeAnnotations() { .required(INT32).as(TIME_MILLIS).named("time") .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp") .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval") - .required(INT32).as(LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)).named("newTime") - .required(INT64).as(LogicalTypeAnnotation.timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)).named("newTimestamp") + .required(INT32).as(timeType(true, MILLIS)).named("newTime") + .required(INT64).as(timestampType(false, MILLIS)).named("newTimestamp") .named("TimeMessage"); assertEquals(expected, parsed); @@ -313,14 +316,14 @@ public void testIntegerAnnotations() { MessageType parsed = MessageTypeParser.parseMessageType(message); MessageType expected = Types.buildMessage() - .required(INT32).as(LogicalTypeAnnotation.intType(8, true)).named("i8") - .required(INT32).as(LogicalTypeAnnotation.intType(16, true)).named("i16") - .required(INT32).as(LogicalTypeAnnotation.intType(32, true)).named("i32") - .required(INT64).as(LogicalTypeAnnotation.intType(64, true)).named("i64") - .required(INT32).as(LogicalTypeAnnotation.intType(8, false)).named("u8") - .required(INT32).as(LogicalTypeAnnotation.intType(16, false)).named("u16") - .required(INT32).as(LogicalTypeAnnotation.intType(32, false)).named("u32") - .required(INT64).as(LogicalTypeAnnotation.intType(64, false)).named("u64") + .required(INT32).as(intType(8, true)).named("i8") + .required(INT32).as(intType(16, true)).named("i16") + .required(INT32).as(intType(32, true)).named("i32") + .required(INT64).as(intType(64, true)).named("i64") + .required(INT32).as(intType(8, false)).named("u8") + .required(INT32).as(intType(16, false)).named("u16") + .required(INT32).as(intType(32, false)).named("u32") + .required(INT64).as(intType(64, false)).named("u64") .named("IntMessage"); assertEquals(expected, parsed); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index da5adc9008..b040d27fdd 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -39,11 +39,21 @@ import org.apache.hadoop.conf.Configuration; import org.apache.parquet.CorruptStatistics; import org.apache.parquet.ParquetReadOptions; +import org.apache.parquet.format.BsonType; import org.apache.parquet.format.CompressionCodec; +import org.apache.parquet.format.DateType; import org.apache.parquet.format.DecimalType; +import org.apache.parquet.format.EnumType; import org.apache.parquet.format.IntType; +import org.apache.parquet.format.JsonType; +import org.apache.parquet.format.ListType; import org.apache.parquet.format.LogicalType; +import org.apache.parquet.format.MapType; +import org.apache.parquet.format.MicroSeconds; +import org.apache.parquet.format.MilliSeconds; +import org.apache.parquet.format.NullType; import org.apache.parquet.format.PageEncodingStats; +import org.apache.parquet.format.StringType; import org.apache.parquet.format.TimeType; import org.apache.parquet.format.TimeUnit; import org.apache.parquet.format.TimestampType; @@ -179,8 +189,8 @@ public void visit(PrimitiveType primitiveType) { element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition())); element.setType(getType(primitiveType.getPrimitiveTypeName())); if (primitiveType.getLogicalTypeAnnotation() != null) { - element.setConverted_type(primitiveType.getLogicalTypeAnnotation().toConvertedType()); - element.setLogicalType(primitiveType.getLogicalTypeAnnotation().toLogicalType()); + element.setConverted_type(convertToConvertedType(primitiveType.getLogicalTypeAnnotation())); + element.setLogicalType(convertToLogicalType(primitiveType.getLogicalTypeAnnotation())); } if (primitiveType.getDecimalMetadata() != null) { element.setPrecision(primitiveType.getDecimalMetadata().getPrecision()); @@ -209,8 +219,8 @@ public void visit(GroupType groupType) { SchemaElement element = new SchemaElement(groupType.getName()); element.setRepetition_type(toParquetRepetition(groupType.getRepetition())); if (groupType.getLogicalTypeAnnotation() != null) { - element.setConverted_type(groupType.getLogicalTypeAnnotation().toConvertedType()); - element.setLogicalType(groupType.getLogicalTypeAnnotation().toLogicalType()); + element.setConverted_type(convertToConvertedType(groupType.getLogicalTypeAnnotation())); + element.setLogicalType(convertToLogicalType(groupType.getLogicalTypeAnnotation())); } if (groupType.getId() != null) { element.setField_id(groupType.getId().intValue()); @@ -229,6 +239,158 @@ private void visitChildren(final List result, }); } + LogicalType convertToLogicalType(LogicalTypeAnnotation logicalTypeAnnotation) { + LogicalTypeConverterVisitor logicalTypeConverterVisitor = new LogicalTypeConverterVisitor(); + logicalTypeAnnotation.accept(logicalTypeConverterVisitor); + return logicalTypeConverterVisitor.logicalType; + } + + ConvertedType convertToConvertedType(LogicalTypeAnnotation logicalTypeAnnotation) { + LogicalTypeConverterVisitor logicalTypeConverterVisitor = new LogicalTypeConverterVisitor(); + logicalTypeAnnotation.accept(logicalTypeConverterVisitor); + return logicalTypeConverterVisitor.convertedType; + } + + + static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.TimeUnit unit) { + switch (unit) { + case MICROS: + return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds()); + case MILLIS: + return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); + default: + throw new RuntimeException("Unknown time unit " + unit); + } + } + + private static class LogicalTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor { + private LogicalType logicalType; + private ConvertedType convertedType; + + @Override + public void visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.STRING(new StringType()); + convertedType = ConvertedType.UTF8; + } + + @Override + public void visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.MAP(new MapType()); + convertedType = ConvertedType.MAP; + } + + @Override + public void visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.LIST(new ListType()); + convertedType = ConvertedType.LIST; + } + + @Override + public void visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.ENUM(new EnumType()); + convertedType = ConvertedType.ENUM; + } + + @Override + public void visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.DECIMAL(new DecimalType(logicalTypeAnnotation.getScale(), logicalTypeAnnotation.getPrecision())); + convertedType = ConvertedType.DECIMAL; + } + + @Override + public void visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.DATE(new DateType()); + convertedType = ConvertedType.DATE; + } + + @Override + public void visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.TIME(new TimeType(logicalTypeAnnotation.isAdjustedToUTC(), convertUnit(logicalTypeAnnotation.getUnit()))); + switch (logicalTypeAnnotation.toOriginalType()) { + case TIME_MILLIS: + convertedType = ConvertedType.TIME_MILLIS; + break; + case TIME_MICROS: + convertedType = ConvertedType.TIME_MICROS; + break; + default: + throw new RuntimeException("Unknown converted type for " + logicalTypeAnnotation.toOriginalType()); + } + } + + @Override + public void visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.TIMESTAMP(new TimestampType(logicalTypeAnnotation.isAdjustedToUTC(), convertUnit(logicalTypeAnnotation.getUnit()))); + switch (logicalTypeAnnotation.toOriginalType()) { + case TIMESTAMP_MICROS: + convertedType = ConvertedType.TIMESTAMP_MICROS; + break; + case TIMESTAMP_MILLIS: + convertedType = ConvertedType.TIMESTAMP_MILLIS; + break; + default: + throw new RuntimeException("Unknown converted type for " + logicalTypeAnnotation.toOriginalType()); + } + } + + @Override + public void visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.INTEGER(new IntType((byte) logicalTypeAnnotation.getBitWidth(), logicalTypeAnnotation.isSigned())); + switch (logicalTypeAnnotation.toOriginalType()) { + case INT_8: + convertedType = ConvertedType.INT_8; + break; + case INT_16: + convertedType = ConvertedType.INT_16; + break; + case INT_32: + convertedType = ConvertedType.INT_32; + break; + case INT_64: + convertedType = ConvertedType.INT_64; + break; + case UINT_8: + convertedType = ConvertedType.UINT_8; + break; + case UINT_16: + convertedType = ConvertedType.UINT_16; + break; + case UINT_32: + convertedType = ConvertedType.UINT_32; + break; + case UINT_64: + convertedType = ConvertedType.UINT_64; + break; + default: + throw new RuntimeException("Unknown original type " + logicalTypeAnnotation.toOriginalType()); + } + } + + @Override + public void visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.JSON(new JsonType()); + convertedType = ConvertedType.JSON; + } + + @Override + public void visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.BSON(new BsonType()); + convertedType = ConvertedType.BSON; + } + + @Override + public void visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.UNKNOWN(new NullType()); + convertedType = ConvertedType.INTERVAL; + } + + @Override + public void visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation logicalTypeAnnotation) { + logicalType = LogicalType.UNKNOWN(new NullType()); + convertedType = ConvertedType.MAP_KEY_VALUE; + } + } + private void addRowGroup(ParquetMetadata parquetMetadata, List rowGroups, BlockMetaData block) { //rowGroup.total_byte_size = ; List columns = block.getColumns(); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index b713ed7f9b..0ab93703fd 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -169,10 +169,10 @@ public void testEnumEquivalence() { } for (OriginalType original : OriginalType.values()) { assertEquals(original, parquetMetadataConverter.getOriginalType( - LogicalTypeAnnotation.fromOriginalType(original, null).toConvertedType(), null).toOriginalType()); + parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original, null)), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { - assertEquals(converted, parquetMetadataConverter.getOriginalType(converted, null).toConvertedType()); + assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted, null))); } } From 77f1d520b93e255234b28db410cb0944a8024c22 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Mon, 14 May 2018 16:34:06 +0200 Subject: [PATCH 11/12] Make LogicalTypeToken enum and getType method package private --- .../parquet/schema/LogicalTypeAnnotation.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 4a2ff5cdf1..0584c81a0f 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -25,7 +25,7 @@ public abstract class LogicalTypeAnnotation { // This is a private enum intended only for internal use for parsing the schema - public enum LogicalTypeToken { + enum LogicalTypeToken { MAP { @Override protected LogicalTypeAnnotation fromString(List params) { @@ -135,7 +135,7 @@ protected LogicalTypeAnnotation fromString(List params) { */ public abstract void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor); - protected abstract LogicalTypeToken getType(); + abstract LogicalTypeToken getType(); protected String typeParametersAsString() { return ""; @@ -273,7 +273,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.UTF8; } @@ -306,7 +306,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.MAP; } @@ -339,7 +339,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.LIST; } @@ -372,7 +372,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.ENUM; } @@ -416,7 +416,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.DECIMAL; } @@ -463,7 +463,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.DATE; } @@ -511,7 +511,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.TIME; } @@ -576,7 +576,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.TIMESTAMP; } @@ -646,7 +646,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.INT; } @@ -701,7 +701,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.JSON; } @@ -734,7 +734,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.BSON; } @@ -774,7 +774,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.INTERVAL; } @@ -814,7 +814,7 @@ public void accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { } @Override - protected LogicalTypeToken getType() { + LogicalTypeToken getType() { return LogicalTypeToken.MAP_KEY_VALUE; } From 6e1ea5d982b8f4d78973d6dda3157166ab51e55a Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Wed, 16 May 2018 14:04:22 +0200 Subject: [PATCH 12/12] Make typeParametersAsString package private --- .../java/org/apache/parquet/schema/LogicalTypeAnnotation.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 0584c81a0f..e22867aec8 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -24,7 +24,6 @@ import java.util.Objects; public abstract class LogicalTypeAnnotation { - // This is a private enum intended only for internal use for parsing the schema enum LogicalTypeToken { MAP { @Override @@ -137,7 +136,7 @@ protected LogicalTypeAnnotation fromString(List params) { abstract LogicalTypeToken getType(); - protected String typeParametersAsString() { + String typeParametersAsString() { return ""; }