From 5d24b02ae6b914ac63a1fdec024904b80ecdc5f2 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 24 Jun 2024 05:50:24 +0530 Subject: [PATCH 1/7] fix: initial commit --- .../arrow/vector/complex/MapVector.java | 9 +++- .../apache/arrow/vector/TestMapVector.java | 49 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 5c6b9b22556..f63f4922968 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -45,8 +45,8 @@ */ public class MapVector extends ListVector { - public static final String KEY_NAME = "key"; - public static final String VALUE_NAME = "value"; + public static String KEY_NAME = "key"; + public static String VALUE_NAME = "value"; public static final String DATA_VECTOR_NAME = "entries"; /** @@ -104,6 +104,11 @@ public void initializeChildrenFromFields(List children) { Field keyField = structField.getChildren().get(0); checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable"); + Field valueField = structField.getChildren().get(1); + // set the KEY_NAME and VALUE_NAME from the field names + KEY_NAME = keyField.getName(); + VALUE_NAME = valueField.getName(); + AddOrGetResult addOrGetVector = addOrGetVector(structField.getFieldType()); checkArgument( addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 213ffced273..83ee7083811 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -39,6 +39,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; @@ -1204,4 +1205,52 @@ public void testMakeTransferPairPreserveNullability() { assertEquals(intField, vec.getField().getChildren().get(0)); assertEquals(intField, res.getField().getChildren().get(0)); } + + @Test + public void testValidateKeyValueFieldNames() { + FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); + FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); + + Field keyField = new Field("myKey", keyType, null); + Field valueField = new Field("myValue", valueType, null); + + List structFields = new ArrayList<>(); + structFields.add(keyField); + structFields.add(valueField); + + Field structField = + new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields); + Field mapField = + new Field( + "mapField", + FieldType.notNullable(new ArrowType.Map(false)), + Collections.singletonList(structField)); + + Schema schema = new Schema(Collections.singletonList(mapField)); + + try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator); + MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapField")) { + UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + for (int i = 0; i < 3; i++) { + mapWriter.startEntry(); + mapWriter.key().bigInt().writeBigInt(i); + mapWriter.value().bigInt().writeBigInt(i * 7); + mapWriter.endEntry(); + } + mapWriter.endMap(); + mapWriter.setValueCount(1); + vectorSchemaRoot.setRowCount(1); + + FieldVector structVector = mapVector.getChildrenFromFields().get(0); + FieldVector keyVector = structVector.getChildrenFromFields().get(0); + FieldVector valueVector = structVector.getChildrenFromFields().get(1); + + assertEquals(keyField.getName(), keyVector.getField().getName()); + assertEquals(valueField.getName(), valueVector.getField().getName()); + + assertEquals(structVector.getChildrenFromFields().size(), 2); + } + } } From b2d67655d1398d41771bf3936dd618eeb389d43b Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 24 Jun 2024 05:57:13 +0530 Subject: [PATCH 2/7] fix: minor --- .../src/test/java/org/apache/arrow/vector/TestMapVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 83ee7083811..746e9de0820 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -1214,7 +1214,7 @@ public void testValidateKeyValueFieldNames() { Field keyField = new Field("myKey", keyType, null); Field valueField = new Field("myValue", valueType, null); - List structFields = new ArrayList<>(); + List structFields = new ArrayList<>(2); structFields.add(keyField); structFields.add(valueField); From 27e770a435885a6775a2b4b8d73ac6d25a73f8d1 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 24 Jun 2024 15:16:26 +0530 Subject: [PATCH 3/7] fix: minor change to update key and value names --- .../org/apache/arrow/vector/complex/MapVector.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index f63f4922968..c54f4e6902f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -71,11 +71,22 @@ public static MapVector empty(String name, BufferAllocator allocator, boolean ke */ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, fieldType, callBack); + KEY_NAME = "key"; + VALUE_NAME = "value"; defaultDataVectorName = DATA_VECTOR_NAME; } + /** + * Construct a MapVector instance. + * + * @param field The name of the field. + * @param allocator The allocator used for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ public MapVector(Field field, BufferAllocator allocator, CallBack callBack) { super(field, allocator, callBack); + KEY_NAME = "key"; + VALUE_NAME = "value"; defaultDataVectorName = DATA_VECTOR_NAME; } From e5be0a49bc2621d799b264738d3270e98ffeb6bb Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 25 Jun 2024 08:00:54 +0530 Subject: [PATCH 4/7] fix: addressing reviews v1 --- .../codegen/templates/UnionMapWriter.java | 31 +++++++++++++++++-- .../arrow/vector/complex/MapVector.java | 12 ++----- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java index 606f880377b..26839391dff 100644 --- a/java/vector/src/main/codegen/templates/UnionMapWriter.java +++ b/java/vector/src/main/codegen/templates/UnionMapWriter.java @@ -140,6 +140,33 @@ public UnionMapWriter value() { return this; } + private String getWriteFieldName() { + Field mapField = this.vector.getField(); + if (mapField == null) { + throw new UnsupportedOperationException("MapVector does not have a field."); + } + if (mapField.getChildren().size() != 1) { + throw new UnsupportedOperationException("MapVector does not have a single struct field."); + } + Field structField = mapField.getChildren().get(0); + switch (mode) { + case KEY: + if (structField.getChildren().size() == 2) { + return structField.getChildren().get(0).getName(); + } else { + return MapVector.KEY_NAME; + } + case VALUE: + if (structField.getChildren().size() == 2) { + return structField.getChildren().get(1).getName(); + } else { + return MapVector.VALUE_NAME; + } + default: + throw new UnsupportedOperationException("Cannot get field name in OFF mode"); + } + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> @@ -149,9 +176,9 @@ public UnionMapWriter value() { public ${name}Writer ${uncappedName}() { switch (mode) { case KEY: - return entryWriter.${uncappedName}(MapVector.KEY_NAME); + return entryWriter.${uncappedName}(getWriteFieldName()); case VALUE: - return entryWriter.${uncappedName}(MapVector.VALUE_NAME); + return entryWriter.${uncappedName}(getWriteFieldName()); default: return this; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index c54f4e6902f..dceb931ae98 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -45,8 +45,11 @@ */ public class MapVector extends ListVector { + /** The default name of the key field in the MapVector. */ public static String KEY_NAME = "key"; + /** The default name of the value field in the MapVector. */ public static String VALUE_NAME = "value"; + public static final String DATA_VECTOR_NAME = "entries"; /** @@ -71,8 +74,6 @@ public static MapVector empty(String name, BufferAllocator allocator, boolean ke */ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, fieldType, callBack); - KEY_NAME = "key"; - VALUE_NAME = "value"; defaultDataVectorName = DATA_VECTOR_NAME; } @@ -85,8 +86,6 @@ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, Ca */ public MapVector(Field field, BufferAllocator allocator, CallBack callBack) { super(field, allocator, callBack); - KEY_NAME = "key"; - VALUE_NAME = "value"; defaultDataVectorName = DATA_VECTOR_NAME; } @@ -115,11 +114,6 @@ public void initializeChildrenFromFields(List children) { Field keyField = structField.getChildren().get(0); checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable"); - Field valueField = structField.getChildren().get(1); - // set the KEY_NAME and VALUE_NAME from the field names - KEY_NAME = keyField.getName(); - VALUE_NAME = valueField.getName(); - AddOrGetResult addOrGetVector = addOrGetVector(structField.getFieldType()); checkArgument( addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); From 0a936df712396c35826403c1d559bb7a23f6e5d0 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 25 Jun 2024 08:24:52 +0530 Subject: [PATCH 5/7] fix: adding c data interface roundtrip --- .../java/org/apache/arrow/c/StreamTest.java | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java index 95363fcc328..de96bd5365b 100644 --- a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java @@ -42,6 +42,8 @@ import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.ipc.ArrowReader; @@ -263,6 +265,62 @@ public void roundtripDictionary() throws Exception { } } + @Test + public void roundtripMap() throws Exception { + Field keyField = + new Field( + "id", FieldType.notNullable(new ArrowType.Int(64, true)), Collections.emptyList()); + Field valueField = + new Field( + "value", FieldType.nullable(new ArrowType.Int(64, true)), Collections.emptyList()); + List structFields = new ArrayList<>(); + structFields.add(keyField); + structFields.add(valueField); + + Field structField = + new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields); + Field mapIntToIntField = + new Field( + "mapFieldIntToInt", + FieldType.notNullable(new ArrowType.Map(false)), + Collections.singletonList(structField)); + + Schema schema = new Schema(Collections.singletonList(mapIntToIntField)); + final List batches = new ArrayList<>(); + + try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator); + MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapFieldIntToInt")) { + UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + for (int i = 0; i < 18; i++) { + mapWriter.startEntry(); + if (i % 2 == 0) { + mapWriter.key().bigInt().writeBigInt(i); + if (i % 3 == 0) { + mapWriter.value().bigInt().writeBigInt(i * 7); + } else { + mapWriter.value().bigInt().writeNull(); + } + } else { + mapWriter.key().bigInt().writeNull(); + mapWriter.value().bigInt().writeNull(); + } + mapWriter.endEntry(); + } + mapWriter.endMap(); + + mapWriter.setValueCount(1); + vectorSchemaRoot.setRowCount(1); + + System.out.println(vectorSchemaRoot.contentToTSVString()); + + VectorUnloader unloader = new VectorUnloader(vectorSchemaRoot); + batches.add(unloader.getRecordBatch()); + roundtrip(schema, batches); + } + } + @Test public void importReleasedStream() { try (final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) { From 37a7341027396e3d2668b1c3286f22f701c0b1df Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Tue, 25 Jun 2024 13:44:26 +0530 Subject: [PATCH 6/7] fix: adding c data interface test --- .../org/apache/arrow/c/RoundtripTest.java | 49 +++++++++++++++++++ .../java/org/apache/arrow/c/StreamTest.java | 2 - 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java index 6591d1f7309..b8ac4230ff6 100644 --- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java @@ -705,6 +705,55 @@ public void testMapVector() { } } + @Test + public void testMapVectorCustomFieldNames() { + Field keyField = + new Field( + "id", FieldType.notNullable(new ArrowType.Int(64, true)), Collections.emptyList()); + Field valueField = + new Field( + "value", FieldType.nullable(new ArrowType.Int(64, true)), Collections.emptyList()); + List structFields = new ArrayList<>(); + structFields.add(keyField); + structFields.add(valueField); + + Field structField = + new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields); + Field mapIntToIntField = + new Field( + "mapFieldIntToInt", + FieldType.notNullable(new ArrowType.Map(false)), + Collections.singletonList(structField)); + + Schema schema = new Schema(Collections.singletonList(mapIntToIntField)); + try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator); + MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapFieldIntToInt")) { + UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + for (int i = 0; i < 18; i++) { + mapWriter.startEntry(); + if (i % 2 == 0) { + mapWriter.key().bigInt().writeBigInt(i); + if (i % 3 == 0) { + mapWriter.value().bigInt().writeBigInt(i * 7); + } else { + mapWriter.value().bigInt().writeNull(); + } + } else { + mapWriter.key().bigInt().writeNull(); + mapWriter.value().bigInt().writeNull(); + } + mapWriter.endEntry(); + } + mapWriter.endMap(); + + mapWriter.setValueCount(1); + vectorSchemaRoot.setRowCount(1); + roundtrip(mapVector, MapVector.class); + } + } + @Test public void testUnionVector() { final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); diff --git a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java index de96bd5365b..d7872b0c92d 100644 --- a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java @@ -313,8 +313,6 @@ public void roundtripMap() throws Exception { mapWriter.setValueCount(1); vectorSchemaRoot.setRowCount(1); - System.out.println(vectorSchemaRoot.contentToTSVString()); - VectorUnloader unloader = new VectorUnloader(vectorSchemaRoot); batches.add(unloader.getRecordBatch()); roundtrip(schema, batches); From 20ed1fc666d88ec6a52575a08d02f32d85e33cf9 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Fri, 28 Jun 2024 05:48:16 +0530 Subject: [PATCH 7/7] fix: addressing reviews v3 --- dev/archery/archery/integration/datagen.py | 5 ++-- .../codegen/templates/UnionMapWriter.java | 24 +++++++++---------- .../arrow/vector/complex/MapVector.java | 4 ++-- .../apache/arrow/vector/TestMapVector.java | 7 +++--- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 47310c905a9..8bdcccd34f0 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1896,9 +1896,10 @@ def _temp_path(): generate_map_case(), generate_non_canonical_map_case() - .skip_tester('Java') # TODO(ARROW-8715) + # .skip_tester('Java') # TODO(ARROW-8715) # Canonical map names are restored on import, so the schemas are unequal - .skip_format(SKIP_C_SCHEMA, 'C++'), + .skip_format(SKIP_C_SCHEMA, 'C++') + .skip_format(SKIP_C_SCHEMA, 'Java'), generate_nested_case(), diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java index 26839391dff..160b79d58d5 100644 --- a/java/vector/src/main/codegen/templates/UnionMapWriter.java +++ b/java/vector/src/main/codegen/templates/UnionMapWriter.java @@ -142,25 +142,25 @@ public UnionMapWriter value() { private String getWriteFieldName() { Field mapField = this.vector.getField(); - if (mapField == null) { - throw new UnsupportedOperationException("MapVector does not have a field."); - } - if (mapField.getChildren().size() != 1) { - throw new UnsupportedOperationException("MapVector does not have a single struct field."); - } + Preconditions.checkNotNull(mapField, "MapVector does not have a field."); + Preconditions.checkArgument(mapField.getChildren().size() == 1, + "MapVector does not have a single struct field."); Field structField = mapField.getChildren().get(0); switch (mode) { case KEY: - if (structField.getChildren().size() == 2) { - return structField.getChildren().get(0).getName(); - } else { + if (structField.getChildren().size() == 0) { + // key is not defined in the struct, use default name return MapVector.KEY_NAME; + } else { + return structField.getChildren().get(0).getName(); } case VALUE: - if (structField.getChildren().size() == 2) { - return structField.getChildren().get(1).getName(); - } else { + if (structField.getChildren().size() < 2) { + // key may or may not have been defined in the struct, but + // value has not been defined. return MapVector.VALUE_NAME; + } else { + return structField.getChildren().get(1).getName(); } default: throw new UnsupportedOperationException("Cannot get field name in OFF mode"); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index dceb931ae98..e6c5caf0f50 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -46,9 +46,9 @@ public class MapVector extends ListVector { /** The default name of the key field in the MapVector. */ - public static String KEY_NAME = "key"; + public static final String KEY_NAME = "key"; /** The default name of the value field in the MapVector. */ - public static String VALUE_NAME = "value"; + public static final String VALUE_NAME = "value"; public static final String DATA_VECTOR_NAME = "entries"; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 746e9de0820..6f8331362e1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -1208,15 +1209,13 @@ public void testMakeTransferPairPreserveNullability() { @Test public void testValidateKeyValueFieldNames() { - FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); + FieldType keyType = FieldType.notNullable(MinorType.BIGINT.getType()); FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); Field keyField = new Field("myKey", keyType, null); Field valueField = new Field("myValue", valueType, null); - List structFields = new ArrayList<>(2); - structFields.add(keyField); - structFields.add(valueField); + List structFields = Arrays.asList(keyField, valueField); Field structField = new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields);