From 741ff717ad4decc8d3a02ae2bb1d18dcef58abab Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Thu, 8 Dec 2016 21:56:09 -0800 Subject: [PATCH 1/2] ARROW-400: set struct length on json load --- .../main/java/org/apache/arrow/vector/complex/MapVector.java | 2 +- .../org/apache/arrow/vector/file/json/JsonFileReader.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index c2f216b197e..31a1bb74b8e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -50,7 +50,7 @@ public class MapVector extends AbstractMapVector { private final SingleMapReaderImpl reader = new SingleMapReaderImpl(this); private final Accessor accessor = new Accessor(); private final Mutator mutator = new Mutator(); - int valueCount; + public int valueCount; public MapVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator, callBack); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index 26dd3f6dfe5..152867c1a11 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -47,6 +47,7 @@ import org.apache.arrow.vector.ValueVector.Mutator; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -153,6 +154,9 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti } readToken(END_ARRAY); } + if (vector instanceof NullableMapVector) { + ((NullableMapVector)vector).valueCount = count; + } } readToken(END_OBJECT); } From f516ba1a3b9206b236c3153e66efb6ccbe4bf345 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 9 Dec 2016 11:51:07 -0500 Subject: [PATCH 2/2] Add unit test for ARROW-400 Change-Id: I9b92eaf56f5a9b5b486eec92356b2fc1b84755cb --- integration/data/struct_example.json | 237 ++++++++++++++++++ .../arrow/vector/file/json/TestJSONFile.java | 20 ++ 2 files changed, 257 insertions(+) create mode 100644 integration/data/struct_example.json diff --git a/integration/data/struct_example.json b/integration/data/struct_example.json new file mode 100644 index 00000000000..3ea062db7ba --- /dev/null +++ b/integration/data/struct_example.json @@ -0,0 +1,237 @@ +{ + "schema": { + "fields": [ + { + "name": "struct_nullable", + "type": { + "name": "struct" + }, + "nullable": true, + "children": [ + { + "name": "f1", + "type": { + "name": "int", + "isSigned": true, + "bitWidth": 32 + }, + "nullable": true, + "children": [], + "typeLayout": { + "vectors": [ + { + "type": "VALIDITY", + "typeBitWidth": 1 + }, + { + "type": "DATA", + "typeBitWidth": 32 + } + ] + } + }, + { + "name": "f2", + "type": { + "name": "utf8" + }, + "nullable": true, + "children": [], + "typeLayout": { + "vectors": [ + { + "type": "VALIDITY", + "typeBitWidth": 1 + }, + { + "type": "OFFSET", + "typeBitWidth": 32 + }, + { + "type": "DATA", + "typeBitWidth": 8 + } + ] + } + } + ], + "typeLayout": { + "vectors": [ + { + "type": "VALIDITY", + "typeBitWidth": 1 + } + ] + } + } + ] + }, + "batches": [ + { + "count": 7, + "columns": [ + { + "name": "struct_nullable", + "count": 7, + "VALIDITY": [ + 0, + 1, + 1, + 1, + 0, + 1, + 0 + ], + "children": [ + { + "name": "f1", + "count": 7, + "VALIDITY": [ + 1, + 0, + 1, + 1, + 1, + 0, + 0 + ], + "DATA": [ + 1402032511, + 290876774, + 137773603, + 410361374, + 1959836418, + 1995074679, + -163525262 + ] + }, + { + "name": "f2", + "count": 7, + "VALIDITY": [ + 0, + 1, + 1, + 1, + 0, + 1, + 0 + ], + "OFFSET": [ + 0, + 0, + 7, + 14, + 21, + 21, + 28, + 28 + ], + "DATA": [ + "", + "MhRNxD4", + "3F9HBxK", + "aVd88fp", + "", + "3loZrRf", + "" + ] + } + ] + } + ] + }, + { + "count": 10, + "columns": [ + { + "name": "struct_nullable", + "count": 10, + "VALIDITY": [ + 0, + 1, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 1 + ], + "children": [ + { + "name": "f1", + "count": 10, + "VALIDITY": [ + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0 + ], + "DATA": [ + -2041500147, + 1715692943, + -35444996, + 1425496657, + 112765084, + 1760754983, + 413888857, + 2039738337, + -1924327700, + 670528518 + ] + }, + { + "name": "f2", + "count": 10, + "VALIDITY": [ + 1, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 0 + ], + "OFFSET": [ + 0, + 7, + 7, + 7, + 14, + 21, + 28, + 35, + 42, + 49, + 49 + ], + "DATA": [ + "AS5oARE", + "", + "", + "JGdagcX", + "78SLiRw", + "vbGf7OY", + "5uh5fTs", + "0ilsf82", + "LjS9MbU", + "" + ] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java index 7d25003f8b3..3720a13b0fc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java @@ -21,11 +21,13 @@ import java.io.IOException; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.file.BaseFileTest; import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -117,4 +119,22 @@ public void testWriteReadUnionJSON() throws IOException { } } + @Test + public void testSetStructLength() throws IOException { + File file = new File("../../integration/data/struct_example.json"); + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + ) { + JsonFileReader reader = new JsonFileReader(file, readerAllocator); + Schema schema = reader.start(); + LOGGER.debug("reading schema: " + schema); + + // initialize vectors + try (VectorSchemaRoot root = reader.read();) { + FieldVector vector = root.getVector("struct_nullable"); + Assert.assertEquals(7, vector.getAccessor().getValueCount()); + } + } + } + }