From 6d4e1dfbaa0071edbc9c8c76b54b1d52a119603f Mon Sep 17 00:00:00 2001 From: Li Jin Date: Fri, 8 Sep 2017 12:40:19 -0400 Subject: [PATCH] Fix JsonReader to read union vectors correctly --- .../vector/file/json/JsonFileReader.java | 11 ++++--- .../arrow/vector/file/json/TestJSONFile.java | 32 +++++++++---------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index 71685d13589..8bb0f26d978 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -67,7 +67,6 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.NullableMapVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.schema.ArrowVectorType; @@ -217,6 +216,11 @@ public VectorSchemaRoot read() throws IOException { } } + /* + * TODO: This method doesn't load some vectors correctly. For instance, it doesn't initialize + * `lastSet` in ListVector, VarCharVector, NullableVarBinaryVector A better way of implementing + * this function is to use `loadFieldBuffers` methods in FieldVector. + */ private void readVector(Field field, FieldVector vector) throws JsonParseException, IOException { List vectorTypes = field.getTypeLayout().getVectorTypes(); List fieldInnerVectors = vector.getFieldInnerVectors(); @@ -231,6 +235,8 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name); } int count = readNextField("count", Integer.class); + vector.allocateNew(); + vector.getMutator().setValueCount(count); for (int v = 0; v < vectorTypes.size(); v++) { ArrowVectorType vectorType = vectorTypes.get(v); BufferBacked innerVector = fieldInnerVectors.get(v); @@ -266,9 +272,6 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti } readToken(END_ARRAY); } - if (vector instanceof NullableMapVector) { - ((NullableMapVector) vector).valueCount = count; - } } readToken(END_OBJECT); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java index b7c06327291..960567fc870 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java @@ -30,6 +30,7 @@ import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; import org.apache.arrow.vector.file.BaseFileTest; import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.Validator; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; @@ -96,28 +97,25 @@ public void testWriteReadUnionJSON() throws IOException { try ( BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); NullableMapVector parent = NullableMapVector.empty("parent", vectorAllocator)) { - writeUnionData(count, parent); - printVectors(parent.getChildrenFromFields()); - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateUnionData(count, root); + try (VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"))) { + validateUnionData(count, root); + writeJSON(file, root, null); - writeJSON(file, root, null); - } - // read - try ( - BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - ) { - JsonFileReader reader = new JsonFileReader(file, readerAllocator); - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); + // read + try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE)) { + JsonFileReader reader = new JsonFileReader(file, readerAllocator); - // initialize vectors - try (VectorSchemaRoot root = reader.read();) { - validateUnionData(count, root); + Schema schema = reader.start(); + LOGGER.debug("reading schema: " + schema); + + try (VectorSchemaRoot rootFromJson = reader.read();) { + validateUnionData(count, rootFromJson); + Validator.compareVectorSchemaRoot(root, rootFromJson); + } + } } } }