-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-1497: [Java] Fix JsonReader to initialize count correctly #1067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,7 +67,6 @@ | |
| import org.apache.arrow.vector.VarBinaryVector; | ||
| import org.apache.arrow.vector.VarCharVector; | ||
| import org.apache.arrow.vector.VectorSchemaRoot; | ||
| import org.apache.arrow.vector.complex.NullableMapVector; | ||
| import org.apache.arrow.vector.dictionary.Dictionary; | ||
| import org.apache.arrow.vector.dictionary.DictionaryProvider; | ||
| import org.apache.arrow.vector.schema.ArrowVectorType; | ||
|
|
@@ -217,6 +216,11 @@ public VectorSchemaRoot read() throws IOException { | |
| } | ||
| } | ||
|
|
||
| /* | ||
| * TODO: This method doesn't load some vectors correctly. For instance, it doesn't initialize | ||
| * `lastSet` in ListVector, VarCharVector, NullableVarBinaryVector A better way of implementing | ||
| * this function is to use `loadFieldBuffers` methods in FieldVector. | ||
| */ | ||
| private void readVector(Field field, FieldVector vector) throws JsonParseException, IOException { | ||
| List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes(); | ||
| List<BufferBacked> fieldInnerVectors = vector.getFieldInnerVectors(); | ||
|
|
@@ -231,6 +235,8 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti | |
| throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name); | ||
| } | ||
| int count = readNextField("count", Integer.class); | ||
| vector.allocateNew(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am still not very familiar with when to call the allocateNew() method. Why is it necessary to set the value count?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We need to call
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should set lastSet before setting the value count else setValueCount() will corrupt the vector.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately it's hard to do it here unless we do some crazy instance matching here, I don't want to do that because it's too hard to maintain. The correct way I think is to use the proper
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, sounds good to me. I think we need lastSet only for VarChar, VarBinary and ListVector?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not too sure, but at least, + their Nullable version. |
||
| vector.getMutator().setValueCount(count); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it work to do the following?
Doing this, you should be able to clean up all those similar calls in the inner vector loop too. I'm also used to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, sorry I commented too late after the merge. I can look into this at another time.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we create a follow up JIRA about this? thanks! |
||
| for (int v = 0; v < vectorTypes.size(); v++) { | ||
| ArrowVectorType vectorType = vectorTypes.get(v); | ||
| BufferBacked innerVector = fieldInnerVectors.get(v); | ||
|
|
@@ -266,9 +272,6 @@ private void readVector(Field field, FieldVector vector) throws JsonParseExcepti | |
| } | ||
| readToken(END_ARRAY); | ||
| } | ||
| if (vector instanceof NullableMapVector) { | ||
| ((NullableMapVector) vector).valueCount = count; | ||
| } | ||
| } | ||
| readToken(END_OBJECT); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; | ||
| import org.apache.arrow.vector.file.BaseFileTest; | ||
| import org.apache.arrow.vector.types.pojo.Schema; | ||
| import org.apache.arrow.vector.util.Validator; | ||
| import org.junit.Assert; | ||
| import org.junit.Test; | ||
| import org.slf4j.Logger; | ||
|
|
@@ -96,28 +97,25 @@ public void testWriteReadUnionJSON() throws IOException { | |
| try ( | ||
| BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); | ||
| NullableMapVector parent = NullableMapVector.empty("parent", vectorAllocator)) { | ||
|
|
||
| writeUnionData(count, parent); | ||
|
|
||
| printVectors(parent.getChildrenFromFields()); | ||
|
|
||
| VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); | ||
| validateUnionData(count, root); | ||
| try (VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"))) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why changing to a nested try blocks?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I need both |
||
| validateUnionData(count, root); | ||
| writeJSON(file, root, null); | ||
|
|
||
| writeJSON(file, root, null); | ||
| } | ||
| // read | ||
| try ( | ||
| BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); | ||
| BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); | ||
| ) { | ||
| JsonFileReader reader = new JsonFileReader(file, readerAllocator); | ||
| Schema schema = reader.start(); | ||
| LOGGER.debug("reading schema: " + schema); | ||
| // read | ||
| try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE)) { | ||
| JsonFileReader reader = new JsonFileReader(file, readerAllocator); | ||
|
|
||
| // initialize vectors | ||
| try (VectorSchemaRoot root = reader.read();) { | ||
| validateUnionData(count, root); | ||
| Schema schema = reader.start(); | ||
| LOGGER.debug("reading schema: " + schema); | ||
|
|
||
| try (VectorSchemaRoot rootFromJson = reader.read();) { | ||
| validateUnionData(count, rootFromJson); | ||
| Validator.compareVectorSchemaRoot(root, rootFromJson); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would fail without the JsonFileReader change |
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can open a follow up Jira for this.