From 8ab4efcfb0a637e179cc02beb3314029bdf34ffc Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 14 May 2019 12:44:47 -0400 Subject: [PATCH 1/3] Adding integration tests for fixed_size_list --- cpp/Dockerfile | 1 - cpp/src/arrow/ipc/writer.cc | 10 +++++ integration/integration_test.py | 43 ++++++++++++++++++- .../arrow/vector/ipc/ArrowFileReader.java | 2 +- 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/cpp/Dockerfile b/cpp/Dockerfile index a570047f07e..52f3521b03b 100644 --- a/cpp/Dockerfile +++ b/cpp/Dockerfile @@ -26,7 +26,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ g++ \ gcc \ git \ - ninja-build \ pkg-config \ tzdata \ wget \ diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 8917410b27e..37927a47f33 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -346,6 +346,16 @@ class RecordBatchSerializer : public ArrayVisitor { Status Visit(const MapArray& array) override { return VisitList(array); } + Status Visit(const FixedSizeListArray& array) override { + --max_recursion_depth_; + auto size = array.list_type()->list_size(); + auto values = array.values()->Slice(array.offset() * size, array.length() * size); + + RETURN_NOT_OK(VisitArray(*values)); + ++max_recursion_depth_; + return Status::OK(); + } + Status Visit(const StructArray& array) override { --max_recursion_depth_; for (int i = 0; i < array.num_fields(); ++i) { diff --git a/integration/integration_test.py b/integration/integration_test.py index cb0501d843c..d9e9eafc9b3 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -651,7 +651,6 @@ class StringColumn(BinaryColumn): def _encode_value(self, x): return frombytes(x) - class ListType(DataType): def __init__(self, name, value_type, nullable=True): @@ -765,6 +764,47 @@ def _get_children(self): return [self.pairs.get_json()] +class FixedSizeListType(DataType): + + def __init__(self, name, value_type, list_size, nullable=True): + super(FixedSizeListType, self).__init__(name, nullable=nullable) + self.value_type = value_type + self.list_size = list_size + + def _get_type(self): + return OrderedDict([ + ('name', 'fixedsizelist'), + ('listSize', self.list_size) + ]) + + def _get_children(self): + return [self.value_type.get_json()] + + def generate_column(self, size, name=None): + is_valid = self._make_is_valid(size) + values = self.value_type.generate_column(size * self.list_size) + + if name is None: + name = self.name + return FixedSizeListColumn(name, size, is_valid, values) + + +class FixedSizeListColumn(Column): + + def __init__(self, name, count, is_valid, values): + super(FixedSizeListColumn, self).__init__(name, count) + self.is_valid = is_valid + self.values = values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]) + ] + + def _get_children(self): + return [self.values.get_json()] + + class StructType(DataType): def __init__(self, name, field_types, nullable=True): @@ -1032,6 +1072,7 @@ def generate_map_case(): def generate_nested_case(): fields = [ ListType('list_nullable', get_field('item', 'int32')), + FixedSizeListType('fixedsizelist_nullable', get_field('item', 'int32'), 4), StructType('struct_nullable', [get_field('f1', 'int32'), get_field('f2', 'utf8')]), diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java index aa25abd8267..e9ffaef78a0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java @@ -142,7 +142,7 @@ public boolean loadRecordBatch(ArrowBlock block) throws IOException { ensureInitialized(); int blockIndex = footer.getRecordBatches().indexOf(block); if (blockIndex == -1) { - throw new IllegalArgumentException("Arrow bock does not exist in record batches: " + block); + throw new IllegalArgumentException("Arrow block does not exist in record batches: " + block); } currentRecordBatch = blockIndex; return loadNextBatch(); From e7ed00143458193d0bbdd8b2def3dc11d6588865 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 14 May 2019 14:58:52 -0400 Subject: [PATCH 2/3] fix flake8 error --- integration/integration_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration/integration_test.py b/integration/integration_test.py index d9e9eafc9b3..fad625ef208 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -651,6 +651,7 @@ class StringColumn(BinaryColumn): def _encode_value(self, x): return frombytes(x) + class ListType(DataType): def __init__(self, name, value_type, nullable=True): @@ -1072,7 +1073,8 @@ def generate_map_case(): def generate_nested_case(): fields = [ ListType('list_nullable', get_field('item', 'int32')), - FixedSizeListType('fixedsizelist_nullable', get_field('item', 'int32'), 4), + FixedSizeListType('fixedsizelist_nullable', + get_field('item', 'int32'), 4), StructType('struct_nullable', [get_field('f1', 'int32'), get_field('f2', 'utf8')]), From 8b356f34c9ece87799de025bd1c98d3234303f20 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 13 Jun 2019 09:29:38 -0400 Subject: [PATCH 3/3] revert removal of ninja-build from dockerfile --- cpp/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/Dockerfile b/cpp/Dockerfile index 52f3521b03b..a570047f07e 100644 --- a/cpp/Dockerfile +++ b/cpp/Dockerfile @@ -26,6 +26,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ g++ \ gcc \ git \ + ninja-build \ pkg-config \ tzdata \ wget \