From cc9cc5f1fc45fa53d2c14f3c2e048ac898c52a4a Mon Sep 17 00:00:00 2001 From: Matthew McMahon Date: Tue, 23 Jun 2020 15:47:10 +1000 Subject: [PATCH 1/2] PARQUET-1879 MapKeyValue is not a valid Logical Type * Writing UNKNOWN logical type into the schema, causes a breakage when parsing the file with Apache Arrow * Instead use the default, of falling back to null when that backwards-compatibility only logical type is present, but still write the original type --- .../parquet/avro/TestAvroSchemaConverter.java | 10 +- .../apache/parquet/avro/TestReadWrite.java | 4 +- .../parquet/schema/ConversionPatterns.java | 5 +- .../java/org/apache/parquet/schema/Types.java | 8 +- .../parquet/schema/TestTypeBuilders.java | 56 ++++----- .../converter/ParquetMetadataConverter.java | 7 +- .../TestParquetMetadataConverter.java | 54 ++++++--- .../hadoop/TestReadWriteMapKeyValue.java | 107 ++++++++++++++++++ .../io/parquet/TestHiveSchemaConverter.java | 10 +- .../parquet/pig/PigSchemaConverter.java | 3 +- .../parquet/pig/TestPigSchemaConverter.java | 20 ++-- .../parquet/pig/TestTupleRecordConsumer.java | 10 +- .../thrift/TestThriftToParquetFileWriter.java | 22 ++-- .../thrift/TestParquetWriteProtocol.java | 30 ++--- .../thrift/TestThriftSchemaConverter.java | 8 +- ...TestThriftSchemaConverterProjectUnion.java | 12 +- 16 files changed, 250 insertions(+), 116 deletions(-) create mode 100644 parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java index 786477bf95..6f87acfbbe 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java @@ -202,13 +202,13 @@ public void testAllTypes() throws Exception { " }\n" + " }\n" + " required group mymap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " required int32 value;\n" + " }\n" + " }\n" + " required group myemptymap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " required int32 value;\n" + " }\n" + @@ -249,13 +249,13 @@ public void testAllTypesOldListBehavior() throws Exception { " repeated int32 array;\n" + " }\n" + " required group mymap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " required int32 value;\n" + " }\n" + " }\n" + " required group myemptymap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " required int32 value;\n" + " }\n" + @@ -320,7 +320,7 @@ public void testOptionalMapValue() throws Exception { schema, "message record1 {\n" + " required group myintmap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional int32 value;\n" + " }\n" + diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java index 1e726c373a..6bdf61e372 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java @@ -539,7 +539,7 @@ public void write(Map record) { recordConsumer.startField("mymap", index); recordConsumer.startGroup(); - recordConsumer.startField("map", 0); + recordConsumer.startField("key_value", 0); recordConsumer.startGroup(); Map mymap = (Map) record.get("mymap"); recordConsumer.startField("key", 0); @@ -553,7 +553,7 @@ public void write(Map record) { } recordConsumer.endField("value", 1); recordConsumer.endGroup(); - recordConsumer.endField("map", 0); + recordConsumer.endField("key_value", 0); recordConsumer.endGroup(); recordConsumer.endField("mymap", index++); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java b/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java index a530db13c8..8ae66f00a5 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java @@ -30,6 +30,7 @@ */ public abstract class ConversionPatterns { + static final String MAP_REPEATED_NAME = "key_value"; private static final String ELEMENT_NAME = "element"; /** @@ -49,7 +50,7 @@ private static GroupType listWrapper(Repetition repetition, String alias, Logica } public static GroupType mapType(Repetition repetition, String alias, Type keyType, Type valueType) { - return mapType(repetition, alias, "map", keyType, valueType); + return mapType(repetition, alias, MAP_REPEATED_NAME, keyType, valueType); } public static GroupType stringKeyMapType(Repetition repetition, String alias, String mapAlias, Type valueType) { @@ -57,7 +58,7 @@ public static GroupType stringKeyMapType(Repetition repetition, String alias, St } public static GroupType stringKeyMapType(Repetition repetition, String alias, Type valueType) { - return stringKeyMapType(repetition, alias, "map", valueType); + return stringKeyMapType(repetition, alias, MAP_REPEATED_NAME, valueType); } public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 05db65538e..570cd066d4 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -31,6 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType; + /** * This class provides fluent builders that produce Parquet schema Types. *

@@ -1179,18 +1181,18 @@ protected Type build(String name) { keyType = STRING_KEY; } - GroupBuilder builder = buildGroup(repetition).as(OriginalType.MAP); + GroupBuilder builder = buildGroup(repetition).as(mapType()); if (id != null) { builder.id(id.intValue()); } if (valueType != null) { return builder - .repeatedGroup().addFields(keyType, valueType).named("map") + .repeatedGroup().addFields(keyType, valueType).named(ConversionPatterns.MAP_REPEATED_NAME) .named(name); } else { return builder - .repeatedGroup().addFields(keyType).named("map") + .repeatedGroup().addFields(keyType).named(ConversionPatterns.MAP_REPEATED_NAME) .named(name); } } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index a8816354b7..8d8be8eb15 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -486,7 +486,7 @@ public void testRequiredMap() { typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new PrimitiveType(REQUIRED, INT64, "value")); GroupType expected = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, - "map", + "key_value", typeList)); GroupType actual = Types.requiredMap() .key(INT64) @@ -501,7 +501,7 @@ public void testOptionalMap() { typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new PrimitiveType(REQUIRED, INT64, "value")); GroupType expected = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, - "map", + "key_value", typeList)); GroupType actual = Types.optionalMap() .key(INT64) @@ -515,7 +515,7 @@ public void testMapWithRequiredValue() { List typeList = new ArrayList<>(); typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new PrimitiveType(REQUIRED, INT64, "value")); - GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); GroupType actual = Types.buildMessage().requiredMap() @@ -530,7 +530,7 @@ public void testMapWithOptionalValue() { List typeList = new ArrayList<>(); typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new PrimitiveType(OPTIONAL, INT64, "value")); - GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); GroupType actual = Types.buildMessage().requiredMap() @@ -554,7 +554,7 @@ public void testMapWithGroupKeyAndOptionalGroupValue() { valueFields.add(new PrimitiveType(OPTIONAL, INT32, "two")); typeList.add(new GroupType(OPTIONAL, "value", valueFields)); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); GroupType actual = Types.optionalMap() @@ -582,7 +582,7 @@ public void testMapWithGroupKeyAndRequiredGroupValue() { valueFields.add(new PrimitiveType(OPTIONAL, INT32, "two")); typeList.add(new GroupType(REQUIRED, "value", valueFields)); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -611,7 +611,7 @@ public void testMapWithGroupKeyAndOptionalValue() { typeList.add(new PrimitiveType(OPTIONAL, DOUBLE, "value")); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -637,7 +637,7 @@ public void testMapWithGroupKeyAndRequiredValue() { typeList.add(new PrimitiveType(REQUIRED, DOUBLE, "value")); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -666,7 +666,7 @@ public void testMapWithOptionalGroupValue() { valueFields.add(new PrimitiveType(OPTIONAL, INT32, "two")); typeList.add(new GroupType(OPTIONAL, "value", valueFields)); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -693,7 +693,7 @@ public void testMapWithRequiredGroupValue() { valueFields.add(new PrimitiveType(OPTIONAL, INT32, "two")); typeList.add(new GroupType(REQUIRED, "value", valueFields)); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -727,7 +727,7 @@ public void testMapWithNestedGroupKeyAndNestedGroupValue() { valueFields.add(new PrimitiveType(OPTIONAL, INT32, "two")); typeList.add(new GroupType(OPTIONAL, "value", valueFields)); - GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -761,7 +761,7 @@ public void testMapWithRequiredListValue() { "list", new PrimitiveType(OPTIONAL, INT64, "element")))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -786,7 +786,7 @@ public void testMapWithOptionalListValue() { "list", new PrimitiveType(OPTIONAL, INT64, "element")))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -811,9 +811,9 @@ public void testMapWithRequiredMapValue() { typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new GroupType(REQUIRED, "value", OriginalType.MAP, - new GroupType(REPEATED, "map", innerMapTypeList))); + new GroupType(REPEATED, "key_value", innerMapTypeList))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -839,9 +839,9 @@ public void testMapWithOptionalMapValue() { typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); typeList.add(new GroupType(OPTIONAL, "value", OriginalType.MAP, - new GroupType(REPEATED, "map", innerMapTypeList))); + new GroupType(REPEATED, "key_value", innerMapTypeList))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -869,7 +869,7 @@ public void testMapWithGroupKeyAndRequiredListValue() { "list", new PrimitiveType(OPTIONAL, INT64, "element")))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -898,7 +898,7 @@ public void testMapWithGroupKeyAndOptionalListValue() { "list", new PrimitiveType(OPTIONAL, INT64, "element")))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -928,9 +928,9 @@ public void testMapWithGroupKeyAndRequiredMapValue() { "first" ))); typeList.add(new GroupType(REQUIRED, "value", OriginalType.MAP, - new GroupType(REPEATED, "map", innerMapTypeList))); + new GroupType(REPEATED, "key_value", innerMapTypeList))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -961,9 +961,9 @@ public void testMapWithGroupKeyAndOptionalMapValue() { "first" ))); typeList.add(new GroupType(OPTIONAL, "value", OriginalType.MAP, - new GroupType(REPEATED, "map", innerMapTypeList))); + new GroupType(REPEATED, "key_value", innerMapTypeList))); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -986,7 +986,7 @@ public void testMapWithNullValue() { List typeList = new ArrayList<>(); typeList.add(new PrimitiveType(REQUIRED, INT64, "key")); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -1004,7 +1004,7 @@ public void testMapWithDefaultKeyAndNullValue() { List typeList = new ArrayList<>(); typeList.add(new PrimitiveType(REQUIRED, BINARY, "key", OriginalType.UTF8)); - GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "map", + GroupType map = new GroupType(OPTIONAL, "myMap", OriginalType.MAP, new GroupType(REPEATED, "key_value", typeList)); MessageType expected = new MessageType("mapParent", map); @@ -1022,7 +1022,7 @@ public void testMapWithPreBuiltKeyAndValueTypes() { Type valueType = Types.required(BOOLEAN).named("value"); GroupType map = new GroupType(REQUIRED, "myMap", OriginalType.MAP, - new GroupType(REPEATED, "map", new Type[] { + new GroupType(REPEATED, "key_value", new Type[] { keyType, valueType })); @@ -1204,7 +1204,7 @@ public void testRequiredMapWithinList() { List fields = new ArrayList<>(); fields.add(new GroupType(REQUIRED, "element", OriginalType.MAP, new GroupType(REPEATED, - "map", + "key_value", innerFields))); GroupType expected = new GroupType(OPTIONAL, "myList", OriginalType.LIST, new GroupType(REPEATED, @@ -1230,7 +1230,7 @@ public void testOptionalMapWithinList() { List fields = new ArrayList<>(); fields.add(new GroupType(OPTIONAL, "element", OriginalType.MAP, new GroupType(REPEATED, - "map", + "key_value", innerFields))); GroupType expected = new GroupType(OPTIONAL, "myList", OriginalType.LIST, new GroupType(REPEATED, diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index e2a93eb3e3..5d8e67b7f4 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -282,7 +282,7 @@ private void visitChildren(final List result, } LogicalType convertToLogicalType(LogicalTypeAnnotation logicalTypeAnnotation) { - return logicalTypeAnnotation.accept(LOGICAL_TYPE_ANNOTATION_VISITOR).get(); + return logicalTypeAnnotation.accept(LOGICAL_TYPE_ANNOTATION_VISITOR).orElse(null); } ConvertedType convertToConvertedType(LogicalTypeAnnotation logicalTypeAnnotation) { @@ -464,11 +464,6 @@ public Optional visit(UUIDLogicalTypeAnnotation uuidLogicalType) { public Optional visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) { return of(LogicalType.UNKNOWN(new NullType())); } - - @Override - public Optional visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) { - return of(LogicalType.UNKNOWN(new NullType())); - } } private void addRowGroup(ParquetMetadata parquetMetadata, List rowGroups, BlockMetaData block) { diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index c8f631dd11..aeff1ae09e 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -36,6 +36,7 @@ import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; @@ -68,7 +69,6 @@ import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.UTF8; import org.apache.parquet.FixedBinaryTestUtils; import org.apache.parquet.Version; import org.apache.parquet.bytes.BytesUtils; @@ -85,6 +85,8 @@ import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.LogicalType; +import org.apache.parquet.format.MapType; +import org.apache.parquet.format.StringType; import org.apache.parquet.format.Util; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; @@ -97,6 +99,7 @@ import org.apache.parquet.internal.column.columnindex.OffsetIndex; import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation.MapKeyValueTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Assert; @@ -153,7 +156,7 @@ public void testSchemaConverterDecimal() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); List schemaElements = parquetMetadataConverter.toParquetSchema( Types.buildMessage() - .required(PrimitiveTypeName.BINARY) + .required(BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinaryDecimal") .optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(4) @@ -243,7 +246,7 @@ public void testParquetMetadataConverterWithoutDictionary() public void testLogicalTypesBackwardCompatibleWithConvertedTypes() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); MessageType expected = Types.buildMessage() - .required(PrimitiveTypeName.BINARY) + .required(BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinaryDecimal") .named("Message"); @@ -259,12 +262,12 @@ public void testLogicalTypesBackwardCompatibleWithConvertedTypes() { public void testIncompatibleLogicalAndConvertedTypes() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); MessageType schema = Types.buildMessage() - .required(PrimitiveTypeName.BINARY) + .required(BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinary") .named("Message"); MessageType expected = Types.buildMessage() - .required(PrimitiveTypeName.BINARY) + .required(BINARY) .as(LogicalTypeAnnotation.jsonType()) .named("aBinary") .named("Message"); @@ -322,6 +325,31 @@ public void testTimeLogicalTypes() { assertEquals(expected, schema); } + @Test + public void testMapLogicalType() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + MessageType expected = Types.buildMessage() + .requiredGroup().as(mapType()) + .repeatedGroup().as(MapKeyValueTypeAnnotation.getInstance()) + .required(BINARY).as(stringType()).named("key") + .required(PrimitiveTypeName.INT32).named("value") + .named("key_value") + .named("testMap") + .named("Message"); + + List parquetSchema = parquetMetadataConverter.toParquetSchema(expected); + assertEquals(5, parquetSchema.size()); + assertEquals(new SchemaElement("Message").setNum_children(1), parquetSchema.get(0)); + assertEquals(new SchemaElement("testMap").setRepetition_type(FieldRepetitionType.REQUIRED).setNum_children(1).setConverted_type(ConvertedType.MAP).setLogicalType(LogicalType.MAP(new MapType())), parquetSchema.get(1)); + // PARQUET-1879 ensure that LogicalType is not written (null) but ConvertedType is MAP_KEY_VALUE for backwards-compatibility + assertEquals(new SchemaElement("key_value").setRepetition_type(FieldRepetitionType.REPEATED).setNum_children(2).setConverted_type(ConvertedType.MAP_KEY_VALUE).setLogicalType(null), parquetSchema.get(2)); + assertEquals(new SchemaElement("key").setType(Type.BYTE_ARRAY).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(ConvertedType.UTF8).setLogicalType(LogicalType.STRING(new StringType())), parquetSchema.get(3)); + assertEquals(new SchemaElement("value").setType(Type.INT32).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(null).setLogicalType(null), parquetSchema.get(4)); + + MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + assertEquals(expected, schema); + } + @Test public void testLogicalToConvertedTypeConversion() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); @@ -363,7 +391,7 @@ public void testLogicalToConvertedTypeConversion() { assertEquals(ConvertedType.LIST, parquetMetadataConverter.convertToConvertedType(listType())); assertEquals(ConvertedType.MAP, parquetMetadataConverter.convertToConvertedType(mapType())); - assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())); + assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(MapKeyValueTypeAnnotation.getInstance())); } @Test @@ -554,7 +582,7 @@ public void testMetadataToJson() { private ColumnChunkMetaData createColumnChunkMetaData() { Set e = new HashSet(); - PrimitiveTypeName t = PrimitiveTypeName.BINARY; + PrimitiveTypeName t = BINARY; ColumnPath p = ColumnPath.get("foo"); CompressionCodecName c = CompressionCodecName.GZIP; BinaryStatistics s = new BinaryStatistics(); @@ -649,7 +677,7 @@ private void testBinaryStats(StatsHelper helper) { formatStats.isSetNull_count()); Statistics roundTripStats = ParquetMetadataConverter.fromParquetStatisticsInternal( - Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, ""), + Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, BINARY, ""), ParquetMetadataConverter.SortOrder.SIGNED); Assert.assertTrue(roundTripStats.isEmpty()); @@ -871,7 +899,7 @@ public void testIgnoreStatsWithSignedSortOrder() { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) + PrimitiveType binaryType = Types.required(BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, @@ -902,7 +930,7 @@ private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helpe stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b"); + PrimitiveType binaryType = Types.required(BINARY).as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, ParquetMetadataConverter.toParquetStatistics(stats), @@ -935,7 +963,7 @@ private void testUseStatsWithSignedSortOrder(StatsHelper helper) { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) + PrimitiveType binaryType = Types.required(BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, @@ -1021,7 +1049,7 @@ public void testV2OnlyStats() { testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), 0x7FFFFFFFFFFFFFFFL, 0x8000000000000000L); - testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + testV2OnlyStats(Types.optional(BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("-765875"), new BigInteger("876856")); testV2OnlyStats( @@ -1054,7 +1082,7 @@ public void testV2StatsEqualMinMax() { testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), -2389943895984985L, -2389943895984985L); - testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + testV2StatsEqualMinMax(Types.optional(BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("823749"), new BigInteger("823749")); testV2StatsEqualMinMax( diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java new file mode 100644 index 0000000000..012685f602 --- /dev/null +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.hadoop; + +import org.apache.hadoop.fs.Path; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroup; +import org.apache.parquet.hadoop.example.ExampleParquetWriter; +import org.apache.parquet.hadoop.example.GroupReadSupport; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +public class TestReadWriteMapKeyValue { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Test + public void testMapLogicalType() throws Exception { + MessageType messageType = MessageTypeParser.parseMessageType( + "message example {\n" + + " required group testMap (MAP) {\n" + + " repeated group key_value {\n" + + " required binary key (STRING);\n" + + " required int64 value;\n" + + " }\n" + + " }\n" + + "}" + ); + + verifyMap(messageType, "key_value"); + } + + @Test + public void testMapConvertedType() throws Exception { + MessageType messageType = MessageTypeParser.parseMessageType( + "message example {\n" + + " required group testMap (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key (STRING);\n" + + " required int64 value;\n" + + " }\n" + + " }\n" + + "}" + ); + + verifyMap(messageType, "map"); + } + + private void verifyMap(final MessageType messageType, final String keyValueName) throws IOException { + Path file = new Path(folder.newFolder("testReadWriteMapKeyValue").getPath(), keyValueName + ".parquet"); + + try (ParquetWriter writer = + ExampleParquetWriter + .builder(file) + .withType(messageType) + .build()) { + final Group group = new SimpleGroup(messageType); + final Group mapGroup = group.addGroup("testMap"); + + for (int index = 0; index < 5; index++) { + final Group keyValueGroup = mapGroup.addGroup(keyValueName); + keyValueGroup.add("key", Binary.fromString("key" + index)); + keyValueGroup.add("value", 100L + index); + } + + writer.write(group); + } + + try (ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), file).build()) { + Group group = reader.read(); + + assertNotNull(group); + assertEquals(5, group.getGroup("testMap", 0).getFieldRepetitionCount(keyValueName)); + + for (int index = 0; index < 5; index++) { + assertEquals("key" + index, group.getGroup("testMap", 0).getGroup(keyValueName, index).getString("key", 0)); + assertEquals(100L + index, group.getGroup("testMap", 0).getGroup(keyValueName, index).getLong("value", 0)); + } + } + } +} diff --git a/parquet-hive/parquet-hive-storage-handler/src/test/java/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java b/parquet-hive/parquet-hive-storage-handler/src/test/java/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java index ad10426438..859fe7025d 100644 --- a/parquet-hive/parquet-hive-storage-handler/src/test/java/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java +++ b/parquet-hive/parquet-hive-storage-handler/src/test/java/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -111,7 +111,7 @@ public void testMap() throws Exception { "map", "message hive_schema {\n" + " optional group mapCol (MAP) {\n" - + " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key;\n" + " optional binary value;\n" + " }\n" @@ -136,7 +136,7 @@ public void testMapOriginalType() throws Exception { assertEquals(1, topLevel.asGroupType().getFieldCount()); org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0); //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE; - assertEquals("map", secondLevel.getName()); + assertEquals("key_value", secondLevel.getName()); assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType()); assertEquals(Repetition.REPEATED, secondLevel.getRepetition()); } diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java index 19356616ae..99058bc400 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java @@ -62,6 +62,7 @@ */ public class PigSchemaConverter { private static final Logger LOG = LoggerFactory.getLogger(PigSchemaConverter.class); + private static final String MAP_REPEATED_NAME = "key_value"; static final String ARRAY_VALUE_NAME = "value"; private ColumnAccess columnAccess; @@ -459,7 +460,7 @@ private GroupType convertMap(String alias, FieldSchema fieldSchema) { throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe); } Type convertedValue = convertWithName(innerField, "value"); - return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"), + return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, MAP_REPEATED_NAME), convertedValue); } diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java index 64d5961ad3..f0d8199284 100644 --- a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java +++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -143,7 +143,7 @@ public void testMap() throws Exception { "message pig_schema {\n" + " optional binary a (UTF8);\n" + " optional group b (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional binary c (UTF8);\n" + @@ -159,7 +159,7 @@ public void testMap2() throws Exception { testConversion("a:map[int]", "message pig_schema {\n" + " optional group a (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional int32 value;" + " }\n" + @@ -172,10 +172,10 @@ public void testMap3() throws Exception { testConversion("a:map[map[int]]", "message pig_schema {\n" + " optional group a (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional int32 value;\n" + " }\n" + @@ -190,7 +190,7 @@ public void testMap4() throws Exception { testConversion("a:map[bag{(a:int)}]", "message pig_schema {\n" + " optional group a (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value (LIST) {\n" + " repeated group bag {\n" + @@ -228,7 +228,7 @@ public void testMapWithFixed() throws Exception { "message pig_schema {\n" + " optional binary a;\n" + " optional group b (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key;\n" + " optional group value {\n" + " optional fixed_len_byte_array(5) c;\n" + @@ -246,7 +246,7 @@ public void testMapWithFixedWithoutOriginalType() throws Exception { "message spark_schema {\n" + " optional binary a;\n" + " optional group b (MAP) {\n" + - " repeated group map {\n" + + " repeated group key_value {\n" + " required binary key;\n" + " optional group value {\n" + " optional fixed_len_byte_array(5) c;\n" + diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestTupleRecordConsumer.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestTupleRecordConsumer.java index ef048f22ed..ff4bd87d64 100644 --- a/parquet-pig/src/test/java/org/apache/parquet/pig/TestTupleRecordConsumer.java +++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestTupleRecordConsumer.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -93,8 +93,8 @@ public void testMaps() throws ExecException, ParserException { String pigSchemaString = "a: [(b: chararray)]"; SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString)); Group map = g.addGroup("a"); - map.addGroup("map").append("key", "foo").addGroup("value").append("b", "foo"); - map.addGroup("map").append("key", "bar").addGroup("value").append("b", "bar"); + map.addGroup("key_value").append("key", "foo").addGroup("value").append("b", "foo"); + map.addGroup("key_value").append("key", "bar").addGroup("value").append("b", "bar"); testFromGroups(pigSchemaString, Arrays.asList(g)); } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java index 66b804ccb8..b8ff23c4a6 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -210,9 +210,9 @@ public void testWriteFileListOfMap() throws IOException, InterruptedException, T assertEquals(listMap.names.size(), g.getGroup("names", 0).getFieldRepetitionCount("names_tuple")); assertEquals(listMap.names.get(0).size(), - g.getGroup("names", 0).getGroup("names_tuple", 0).getFieldRepetitionCount("map")); + g.getGroup("names", 0).getGroup("names_tuple", 0).getFieldRepetitionCount("key_value")); assertEquals(listMap.names.get(1).size(), - g.getGroup("names", 0).getGroup("names_tuple", 1).getFieldRepetitionCount("map")); + g.getGroup("names", 0).getGroup("names_tuple", 1).getFieldRepetitionCount("key_value")); } } @@ -228,9 +228,9 @@ public void testWriteFileMapOfList() throws IOException, InterruptedException, T Group g = null; while((g = reader.read()) != null) { assertEquals("key", - g.getGroup("names", 0).getGroup("map",0).getBinary("key", 0).toStringUsingUTF8()); + g.getGroup("names", 0).getGroup("key_value",0).getBinary("key", 0).toStringUsingUTF8()); assertEquals(map.get("key").size(), - g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getFieldRepetitionCount(0)); + g.getGroup("names", 0).getGroup("key_value",0).getGroup("value", 0).getFieldRepetitionCount(0)); } } @@ -246,13 +246,13 @@ public void testWriteFileMapOfLists() throws IOException, InterruptedException, Group g = null; while((g = reader.read()) != null) { assertEquals("key1", - g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 0).toStringUsingUTF8()); + g.getGroup("names", 0).getGroup("key_value",0).getGroup("key", 0).getBinary("key_tuple", 0).toStringUsingUTF8()); assertEquals("key2", - g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 1).toStringUsingUTF8()); + g.getGroup("names", 0).getGroup("key_value",0).getGroup("key", 0).getBinary("key_tuple", 1).toStringUsingUTF8()); assertEquals("val1", - g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 0).toStringUsingUTF8()); + g.getGroup("names", 0).getGroup("key_value",0).getGroup("value", 0).getBinary("value_tuple", 0).toStringUsingUTF8()); assertEquals("val2", - g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 1).toStringUsingUTF8()); + g.getGroup("names", 0).getGroup("key_value",0).getGroup("value", 0).getBinary("value_tuple", 1).toStringUsingUTF8()); } } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java index df60766f2d..f9702c0525 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -78,7 +78,7 @@ public void testMap() throws Exception { "endField(name, 0)", "startField(names, 1)", "startGroup()", - "startField(map, 0)", + "startField(key_value, 0)", "startGroup()", "startField(key, 0)", "addBinary(foo)", @@ -95,7 +95,7 @@ public void testMap() throws Exception { "addBinary(bar2)", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(names, 1)", "endMessage()" @@ -107,7 +107,7 @@ public void testMap() throws Exception { "endField(name, 0)", "startField(names, 1)", "startGroup()", - "startField(map, 0)", + "startField(key_value, 0)", "startGroup()", "startField(key, 0)", "addBinary(foo2)", @@ -124,7 +124,7 @@ public void testMap() throws Exception { "addBinary(bar)", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(names, 1)", "endMessage()" @@ -163,7 +163,7 @@ public void testMapInSet() throws Exception { "startGroup()", "startField(names_tuple, 0)", // map: optional field "startGroup()", - "startField(map, 0)", // repeated field + "startField(key_value, 0)", // repeated field "startGroup()", "startField(key, 0)", // key "addBinary(foo)", @@ -172,7 +172,7 @@ public void testMapInSet() throws Exception { "addBinary(bar)", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(names_tuple, 0)", "endGroup()", @@ -198,7 +198,7 @@ public void testMapInSet() throws Exception { "startGroup()", "startField(names_tuple, 0)", // map: optional field "startGroup()", - "startField(map, 0)", // repeated field + "startField(key_value, 0)", // repeated field "startGroup()", "startField(key, 0)", // key "addBinary(foo)", @@ -207,7 +207,7 @@ public void testMapInSet() throws Exception { "addBinary(bar)", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(names_tuple, 0)", "endGroup()", @@ -278,7 +278,7 @@ public void testStructInMap() throws Exception { "endField(name, 0)", "startField(names, 1)", "startGroup()", - "startField(map, 0)", + "startField(key_value, 0)", "startGroup()", "startField(key, 0)", "addBinary(foo)", @@ -302,12 +302,12 @@ public void testStructInMap() throws Exception { "endGroup()", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(names, 1)", "startField(name_to_id, 2)", "startGroup()", - "startField(map, 0)", + "startField(key_value, 0)", "startGroup()", "startField(key, 0)", "addBinary(bar)", @@ -316,7 +316,7 @@ public void testStructInMap() throws Exception { "addInt(10)", "endField(value, 1)", "endGroup()", - "endField(map, 0)", + "endField(key_value, 0)", "endGroup()", "endField(name_to_id, 2)", "endMessage()" diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java index 5a7b20922d..41c8c29582 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverter.java @@ -152,7 +152,7 @@ public void testProjectMapThriftType() { shouldGetProjectedSchema("name;names/key*;names/value/**", "name;names.key*;names.value", "message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional group name = 1 {\n" + @@ -160,7 +160,7 @@ public void testProjectMapThriftType() { " optional binary last_name (UTF8) = 2;\n" + " }\n" + " optional group phones (MAP) = 2 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (ENUM);\n" + " optional binary value (UTF8);\n" + " }\n" + @@ -174,7 +174,7 @@ public void testProjectMapThriftType() { shouldGetProjectedSchema("name;names/key;names/value/name/*", "name;names.key;names.value.name","message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional group name = 1 {\n" + @@ -192,7 +192,7 @@ public void testProjectOnlyKeyInMap() { shouldGetProjectedSchema("name;names/key", "name;names.key", "message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional group name = 1 {\n" + diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java index 611a1a9c55..e37934ab0d 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftSchemaConverterProjectUnion.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -308,7 +308,7 @@ public void testMapWithUnionKey() { "optMapWithUnionKey.key", "message ParquetSchema {\n" + " optional group optMapWithUnionKey (MAP) = 1 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required group key {\n" + " optional group structV3 = 1 {\n" + " required binary name (UTF8) = 1;\n" + @@ -341,7 +341,7 @@ public void testMapWithUnionKey() { "optMapWithUnionKey.{key,value.gender}", "message ParquetSchema {\n" + " optional group optMapWithUnionKey (MAP) = 1 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required group key {\n" + " optional group structV3 = 1 {\n" + " required binary name (UTF8) = 1;\n" + @@ -377,7 +377,7 @@ public void testMapWithUnionValue() { "optMapWithUnionValue.{key,value.structV4.addedStruct.gender}", "message ParquetSchema {\n" + " optional group optMapWithUnionValue (MAP) = 1 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " repeated group key_value (MAP_KEY_VALUE) {\n" + " required group key {\n" + " required binary name (UTF8) = 1;\n" + " optional binary age (UTF8) = 2;\n" + From 7eddaec3966b3d92b94d992f86ed82fa68a45d0c Mon Sep 17 00:00:00 2001 From: Matthew McMahon Date: Thu, 25 Jun 2020 21:11:40 +1000 Subject: [PATCH 2/2] PARQUET-1879 MapKeyValue is not a valid Logical Type * Writing UNKNOWN logical type into the schema, causes a breakage when parsing the file with Apache Arrow * Instead use the default, of falling back to null when that backwards-compatibility only logical type is present, but still write the original type --- .../TestParquetMetadataConverter.java | 155 +++++++++++++----- .../hadoop/TestReadWriteMapKeyValue.java | 107 ------------ 2 files changed, 116 insertions(+), 146 deletions(-) delete mode 100644 parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index aeff1ae09e..aee2dc63eb 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -36,9 +36,9 @@ import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -69,6 +69,8 @@ import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.UTF8; import org.apache.parquet.FixedBinaryTestUtils; import org.apache.parquet.Version; import org.apache.parquet.bytes.BytesUtils; @@ -83,11 +85,17 @@ import org.apache.parquet.column.statistics.IntStatistics; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroup; import org.apache.parquet.format.DecimalType; import org.apache.parquet.format.LogicalType; import org.apache.parquet.format.MapType; import org.apache.parquet.format.StringType; import org.apache.parquet.format.Util; +import org.apache.parquet.hadoop.ParquetReader; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.ExampleParquetWriter; +import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.hadoop.metadata.ColumnPath; @@ -99,10 +107,10 @@ import org.apache.parquet.internal.column.columnindex.OffsetIndex; import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; import org.apache.parquet.io.api.Binary; -import org.apache.parquet.schema.LogicalTypeAnnotation.MapKeyValueTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; import org.apache.parquet.example.Paper; import org.apache.parquet.format.ColumnChunk; @@ -123,6 +131,7 @@ import org.apache.parquet.schema.Types; import com.google.common.collect.Lists; +import org.junit.rules.TemporaryFolder; public class TestParquetMetadataConverter { private static SecureRandom random = new SecureRandom(); @@ -131,6 +140,9 @@ public class TestParquetMetadataConverter { private static final String NUMBER = "0123456789"; private static final String DATA_FOR_RANDOM_STRING = CHAR_LOWER + CHAR_UPPER + NUMBER; + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + @Test public void testPageHeader() throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -156,7 +168,7 @@ public void testSchemaConverterDecimal() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); List schemaElements = parquetMetadataConverter.toParquetSchema( Types.buildMessage() - .required(BINARY) + .required(PrimitiveTypeName.BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinaryDecimal") .optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(4) @@ -246,7 +258,7 @@ public void testParquetMetadataConverterWithoutDictionary() public void testLogicalTypesBackwardCompatibleWithConvertedTypes() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); MessageType expected = Types.buildMessage() - .required(BINARY) + .required(PrimitiveTypeName.BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinaryDecimal") .named("Message"); @@ -262,12 +274,12 @@ public void testLogicalTypesBackwardCompatibleWithConvertedTypes() { public void testIncompatibleLogicalAndConvertedTypes() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); MessageType schema = Types.buildMessage() - .required(BINARY) + .required(PrimitiveTypeName.BINARY) .as(OriginalType.DECIMAL).precision(9).scale(2) .named("aBinary") .named("Message"); MessageType expected = Types.buildMessage() - .required(BINARY) + .required(PrimitiveTypeName.BINARY) .as(LogicalTypeAnnotation.jsonType()) .named("aBinary") .named("Message"); @@ -325,31 +337,6 @@ public void testTimeLogicalTypes() { assertEquals(expected, schema); } - @Test - public void testMapLogicalType() { - ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); - MessageType expected = Types.buildMessage() - .requiredGroup().as(mapType()) - .repeatedGroup().as(MapKeyValueTypeAnnotation.getInstance()) - .required(BINARY).as(stringType()).named("key") - .required(PrimitiveTypeName.INT32).named("value") - .named("key_value") - .named("testMap") - .named("Message"); - - List parquetSchema = parquetMetadataConverter.toParquetSchema(expected); - assertEquals(5, parquetSchema.size()); - assertEquals(new SchemaElement("Message").setNum_children(1), parquetSchema.get(0)); - assertEquals(new SchemaElement("testMap").setRepetition_type(FieldRepetitionType.REQUIRED).setNum_children(1).setConverted_type(ConvertedType.MAP).setLogicalType(LogicalType.MAP(new MapType())), parquetSchema.get(1)); - // PARQUET-1879 ensure that LogicalType is not written (null) but ConvertedType is MAP_KEY_VALUE for backwards-compatibility - assertEquals(new SchemaElement("key_value").setRepetition_type(FieldRepetitionType.REPEATED).setNum_children(2).setConverted_type(ConvertedType.MAP_KEY_VALUE).setLogicalType(null), parquetSchema.get(2)); - assertEquals(new SchemaElement("key").setType(Type.BYTE_ARRAY).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(ConvertedType.UTF8).setLogicalType(LogicalType.STRING(new StringType())), parquetSchema.get(3)); - assertEquals(new SchemaElement("value").setType(Type.INT32).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(null).setLogicalType(null), parquetSchema.get(4)); - - MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); - assertEquals(expected, schema); - } - @Test public void testLogicalToConvertedTypeConversion() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); @@ -391,7 +378,7 @@ public void testLogicalToConvertedTypeConversion() { assertEquals(ConvertedType.LIST, parquetMetadataConverter.convertToConvertedType(listType())); assertEquals(ConvertedType.MAP, parquetMetadataConverter.convertToConvertedType(mapType())); - assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(MapKeyValueTypeAnnotation.getInstance())); + assertEquals(ConvertedType.MAP_KEY_VALUE, parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())); } @Test @@ -582,7 +569,7 @@ public void testMetadataToJson() { private ColumnChunkMetaData createColumnChunkMetaData() { Set e = new HashSet(); - PrimitiveTypeName t = BINARY; + PrimitiveTypeName t = PrimitiveTypeName.BINARY; ColumnPath p = ColumnPath.get("foo"); CompressionCodecName c = CompressionCodecName.GZIP; BinaryStatistics s = new BinaryStatistics(); @@ -677,7 +664,7 @@ private void testBinaryStats(StatsHelper helper) { formatStats.isSetNull_count()); Statistics roundTripStats = ParquetMetadataConverter.fromParquetStatisticsInternal( - Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, BINARY, ""), + Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, ""), ParquetMetadataConverter.SortOrder.SIGNED); Assert.assertTrue(roundTripStats.isEmpty()); @@ -899,7 +886,7 @@ public void testIgnoreStatsWithSignedSortOrder() { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(BINARY) + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, @@ -930,7 +917,7 @@ private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helpe stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(BINARY).as(OriginalType.UTF8).named("b"); + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, ParquetMetadataConverter.toParquetStatistics(stats), @@ -963,7 +950,7 @@ private void testUseStatsWithSignedSortOrder(StatsHelper helper) { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); - PrimitiveType binaryType = Types.required(BINARY) + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, @@ -1049,7 +1036,7 @@ public void testV2OnlyStats() { testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), 0x7FFFFFFFFFFFFFFFL, 0x8000000000000000L); - testV2OnlyStats(Types.optional(BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("-765875"), new BigInteger("876856")); testV2OnlyStats( @@ -1082,7 +1069,7 @@ public void testV2StatsEqualMinMax() { testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), -2389943895984985L, -2389943895984985L); - testV2StatsEqualMinMax(Types.optional(BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), new BigInteger("823749"), new BigInteger("823749")); testV2StatsEqualMinMax( @@ -1289,4 +1276,94 @@ public void testColumnIndexConversion() { ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex)); } + + @Test + public void testMapLogicalType() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + MessageType expected = Types.buildMessage() + .requiredGroup().as(mapType()) + .repeatedGroup().as(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()) + .required(PrimitiveTypeName.BINARY).as(stringType()).named("key") + .required(PrimitiveTypeName.INT32).named("value") + .named("key_value") + .named("testMap") + .named("Message"); + + List parquetSchema = parquetMetadataConverter.toParquetSchema(expected); + assertEquals(5, parquetSchema.size()); + assertEquals(new SchemaElement("Message").setNum_children(1), parquetSchema.get(0)); + assertEquals(new SchemaElement("testMap").setRepetition_type(FieldRepetitionType.REQUIRED).setNum_children(1).setConverted_type(ConvertedType.MAP).setLogicalType(LogicalType.MAP(new MapType())), parquetSchema.get(1)); + // PARQUET-1879 ensure that LogicalType is not written (null) but ConvertedType is MAP_KEY_VALUE for backwards-compatibility + assertEquals(new SchemaElement("key_value").setRepetition_type(FieldRepetitionType.REPEATED).setNum_children(2).setConverted_type(ConvertedType.MAP_KEY_VALUE).setLogicalType(null), parquetSchema.get(2)); + assertEquals(new SchemaElement("key").setType(Type.BYTE_ARRAY).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(ConvertedType.UTF8).setLogicalType(LogicalType.STRING(new StringType())), parquetSchema.get(3)); + assertEquals(new SchemaElement("value").setType(Type.INT32).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(null).setLogicalType(null), parquetSchema.get(4)); + + MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + assertEquals(expected, schema); + } + + @Test + public void testMapLogicalTypeReadWrite() throws Exception { + MessageType messageType = Types.buildMessage() + .requiredGroup().as(mapType()) + .repeatedGroup().as(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()) + .required(PrimitiveTypeName.BINARY).as(stringType()).named("key") + .required(PrimitiveTypeName.INT64).named("value") + .named("key_value") + .named("testMap") + .named("example"); + + verifyMapMessageType(messageType, "key_value"); + } + + @Test + public void testMapConvertedTypeReadWrite() throws Exception { + List oldConvertedTypeSchemaElements = new ArrayList<>(); + oldConvertedTypeSchemaElements.add(new SchemaElement("example").setNum_children(1)); + oldConvertedTypeSchemaElements.add(new SchemaElement("testMap").setRepetition_type(FieldRepetitionType.REQUIRED).setNum_children(1).setConverted_type(ConvertedType.MAP).setLogicalType(null)); + oldConvertedTypeSchemaElements.add(new SchemaElement("map").setRepetition_type(FieldRepetitionType.REPEATED).setNum_children(2).setConverted_type(ConvertedType.MAP_KEY_VALUE).setLogicalType(null)); + oldConvertedTypeSchemaElements.add(new SchemaElement("key").setType(Type.BYTE_ARRAY).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(ConvertedType.UTF8).setLogicalType(null)); + oldConvertedTypeSchemaElements.add(new SchemaElement("value").setType(Type.INT64).setRepetition_type(FieldRepetitionType.REQUIRED).setConverted_type(null).setLogicalType(null)); + + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + MessageType messageType = parquetMetadataConverter.fromParquetSchema(oldConvertedTypeSchemaElements, null); + + verifyMapMessageType(messageType, "map"); + } + + private void verifyMapMessageType(final MessageType messageType, final String keyValueName) throws IOException { + Path file = new Path(temporaryFolder.newFolder("verifyMapMessageType").getPath(), keyValueName + ".parquet"); + + try (ParquetWriter writer = + ExampleParquetWriter + .builder(file) + .withType(messageType) + .build()) { + final Group group = new SimpleGroup(messageType); + final Group mapGroup = group.addGroup("testMap"); + + for (int index = 0; index < 5; index++) { + final Group keyValueGroup = mapGroup.addGroup(keyValueName); + keyValueGroup.add("key", Binary.fromString("key" + index)); + keyValueGroup.add("value", 100L + index); + } + + writer.write(group); + } + + try (ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), file).build()) { + Group group = reader.read(); + + assertNotNull(group); + + Group testMap = group.getGroup("testMap", 0); + assertNotNull(testMap); + assertEquals(5, testMap.getFieldRepetitionCount(keyValueName)); + + for (int index = 0; index < 5; index++) { + assertEquals("key" + index, testMap.getGroup(keyValueName, index).getString("key", 0)); + assertEquals(100L + index, testMap.getGroup(keyValueName, index).getLong("value", 0)); + } + } + } } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java deleted file mode 100644 index 012685f602..0000000000 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestReadWriteMapKeyValue.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.hadoop; - -import org.apache.hadoop.fs.Path; -import org.apache.parquet.example.data.Group; -import org.apache.parquet.example.data.simple.SimpleGroup; -import org.apache.parquet.hadoop.example.ExampleParquetWriter; -import org.apache.parquet.hadoop.example.GroupReadSupport; -import org.apache.parquet.io.api.Binary; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.MessageTypeParser; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.io.IOException; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -public class TestReadWriteMapKeyValue { - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Test - public void testMapLogicalType() throws Exception { - MessageType messageType = MessageTypeParser.parseMessageType( - "message example {\n" + - " required group testMap (MAP) {\n" + - " repeated group key_value {\n" + - " required binary key (STRING);\n" + - " required int64 value;\n" + - " }\n" + - " }\n" + - "}" - ); - - verifyMap(messageType, "key_value"); - } - - @Test - public void testMapConvertedType() throws Exception { - MessageType messageType = MessageTypeParser.parseMessageType( - "message example {\n" + - " required group testMap (MAP) {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + - " required binary key (STRING);\n" + - " required int64 value;\n" + - " }\n" + - " }\n" + - "}" - ); - - verifyMap(messageType, "map"); - } - - private void verifyMap(final MessageType messageType, final String keyValueName) throws IOException { - Path file = new Path(folder.newFolder("testReadWriteMapKeyValue").getPath(), keyValueName + ".parquet"); - - try (ParquetWriter writer = - ExampleParquetWriter - .builder(file) - .withType(messageType) - .build()) { - final Group group = new SimpleGroup(messageType); - final Group mapGroup = group.addGroup("testMap"); - - for (int index = 0; index < 5; index++) { - final Group keyValueGroup = mapGroup.addGroup(keyValueName); - keyValueGroup.add("key", Binary.fromString("key" + index)); - keyValueGroup.add("value", 100L + index); - } - - writer.write(group); - } - - try (ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), file).build()) { - Group group = reader.read(); - - assertNotNull(group); - assertEquals(5, group.getGroup("testMap", 0).getFieldRepetitionCount(keyValueName)); - - for (int index = 0; index < 5; index++) { - assertEquals("key" + index, group.getGroup("testMap", 0).getGroup(keyValueName, index).getString("key", 0)); - assertEquals(100L + index, group.getGroup("testMap", 0).getGroup(keyValueName, index).getLong("value", 0)); - } - } - } -}