diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 42663c0178d..135296551c9 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -1274,8 +1274,8 @@ class ArrayReader { Status Visit(const MapType& type) { auto list_type = std::make_shared(field( - "item", - struct_({field("key", type.key_type(), false), field("item", type.item_type())}), + "entries", + struct_({field("key", type.key_type(), false), field("value", type.item_type())}), false)); std::shared_ptr list_array; RETURN_NOT_OK(CreateList(list_type, &list_array)); diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc index fb57fa7f52e..338552dd575 100644 --- a/cpp/src/arrow/ipc/json-test.cc +++ b/cpp/src/arrow/ipc/json-test.cc @@ -204,7 +204,7 @@ TEST(TestJsonArrayWriter, NestedTypes) { TestArrayRoundTrip(list_array); - // List + // Map auto map_type = map(utf8(), int32()); auto keys_array = ArrayFromJSON(utf8(), R"(["a", "b", "c", "d", "a", "b", "c"])"); diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 4b349cb4a69..4e1a1576ddb 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -320,9 +320,7 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data, if (children.size() != 1) { return Status::Invalid("Map must have exactly 1 child field"); } - if ( // FIXME(bkietz) temporarily disabled: this field is sometimes read nullable - // children[0]->nullable() || - children[0]->type()->id() != Type::STRUCT || + if (children[0]->nullable() || children[0]->type()->id() != Type::STRUCT || children[0]->type()->num_children() != 2) { return Status::Invalid("Map's key-item pairs must be non-nullable structs"); } diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc index 12adebc2516..47c307659f0 100644 --- a/cpp/src/arrow/ipc/test-common.cc +++ b/cpp/src/arrow/ipc/test-common.cc @@ -120,7 +120,7 @@ Status MakeRandomMapArray(const std::shared_ptr& key_array, bool include_nulls, MemoryPool* pool, std::shared_ptr* out) { auto pair_type = struct_( - {field("key", key_array->type(), false), field("item", item_array->type())}); + {field("key", key_array->type(), false), field("value", item_array->type())}); auto pair_array = std::make_shared(pair_type, num_maps, ArrayVector{key_array, item_array}); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index b21533121c5..b3fcb0cde84 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -153,8 +153,11 @@ std::string ListType::ToString() const { MapType::MapType(const std::shared_ptr& key_type, const std::shared_ptr& item_type, bool keys_sorted) - : ListType(struct_({std::make_shared("key", key_type, false), - std::make_shared("item", item_type)})), + : ListType(std::make_shared( + "entries", + struct_({std::make_shared("key", key_type, false), + std::make_shared("value", item_type)}), + false)), keys_sorted_(keys_sorted) { id_ = type_id; } diff --git a/integration/integration_test.py b/integration/integration_test.py index aca05747c72..dbc03c73a1a 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -714,7 +714,7 @@ def __init__(self, name, key_type, item_type, nullable=True, assert not key_type.nullable self.key_type = key_type self.item_type = item_type - self.pair_type = StructType('item', [key_type, item_type], False) + self.pair_type = StructType('entries', [key_type, item_type], False) self.keysSorted = keysSorted def _get_type(self): @@ -1060,10 +1060,10 @@ def generate_interval_case(): def generate_map_case(): # TODO(bkietz): separated from nested_case so it can be - # independently skipped, consolidate after Java supports map + # independently skipped, consolidate after JS supports map fields = [ MapType('map_nullable', get_field('key', 'utf8', False), - get_field('item', 'int32')), + get_field('value', 'int32')), ] batch_sizes = [7, 10] @@ -1220,12 +1220,10 @@ def _compare_implementations(self, producer, consumer): file_id = guid()[:8] - if (('JS' in (producer.name, consumer.name) or - 'Java' in (producer.name, consumer.name)) and + if ('JS' in (producer.name, consumer.name) and "map" in test_case.name): print('TODO(ARROW-1279): Enable map tests ' + - ' for Java and JS once Java supports them and JS\'' + - ' are unbroken') + ' for JS once they are unbroken') continue if ('JS' in (producer.name, consumer.name) and diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 340fb2ae1ad..a3671501ba7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -25,14 +25,17 @@ import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; /** * A MapVector is used to store entries of key/value pairs. It is a container vector that is @@ -46,6 +49,9 @@ public class MapVector extends ListVector { public static final String KEY_NAME = "key"; public static final String VALUE_NAME = "value"; + // TODO: this is only used for addOrGetVector because ListVector declares it private + protected CallBack callBack; + /** * Construct an empty MapVector with no data. Child vectors must be added subsequently. * @@ -68,6 +74,7 @@ public static MapVector empty(String name, BufferAllocator allocator, boolean ke */ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, fieldType, callBack); + this.callBack = callBack; reader = new UnionMapReader(this); } @@ -121,9 +128,29 @@ public UnionMapReader getReader() { */ @Override public AddOrGetResult addOrGetVector(FieldType fieldType) { - AddOrGetResult result = super.addOrGetVector(fieldType); + + // TODO: can call super method once DATA_VECTOR_NAME is configurable + boolean created = false; + if (vector instanceof ZeroVector) { + vector = fieldType.createNewSingleVector("entries", allocator, callBack); + // returned vector must have the same field + created = true; + if (callBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowTypeID.Null)) { + callBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + reader = new UnionMapReader(this); - return result; + + return new AddOrGetResult<>((T) vector, created); } /**