-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-42156: [Java] Handle offset field from ArrowArray when BufferImportTypeVisitor imports offset buffer #43053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
b2883ac
fix: temp
vibhatha fd71ada
fix: temp c++ slice test
vibhatha b61ac8f
fix: initial functional code
vibhatha 666aa0f
fix: revert test in C++
vibhatha d8b0a42
fix: minor
vibhatha 6ec8384
fix: remove unnecessary java tests
vibhatha d257fdd
fix: data buffer capcity
vibhatha 37865ce
fix: addressing reviews partially
vibhatha 5077763
fix: addressing reviews
vibhatha ee62b7e
fix: addressing reviews v2
vibhatha 216de29
fix: addressing reviews v3
vibhatha 27ce685
fix: addressing reviews v4
vibhatha eb2dff1
fix: addressing reviews v4
vibhatha a07266f
fix: revert change on offset usage for capacity determination
vibhatha 69446bf
fix: adding additional test
vibhatha a04a412
fix: adding other test skeletons with todos
vibhatha c3424b8
fix: adding experimental offset method for data slicing
vibhatha 9563162
fix: temp
vibhatha f2f5fcb
fix: adding tests for Int, List, LargeList, Utf8, Binary
vibhatha c2d2385
fix: adding test cases and updates for variable-width view vector
vibhatha 6774efe
fix: temp commit for fixedsizelist
vibhatha 9497066
fix: test ci log enabling
vibhatha 7e591b3
fix: test 3
vibhatha f3cf3ff
fix: test 4
vibhatha 07660f1
fix: test 5
vibhatha File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| import org.apache.arrow.util.AutoCloseables; | ||
| import org.apache.arrow.util.VisibleForTesting; | ||
| import org.apache.arrow.vector.BaseVariableWidthViewVector; | ||
| import org.apache.arrow.vector.BitVectorHelper; | ||
| import org.apache.arrow.vector.DateDayVector; | ||
| import org.apache.arrow.vector.DateMilliVector; | ||
| import org.apache.arrow.vector.DurationVector; | ||
|
|
@@ -59,17 +60,20 @@ class BufferImportTypeVisitor implements ArrowType.ArrowTypeVisitor<List<ArrowBu | |
| private final BufferAllocator allocator; | ||
| private final ReferenceCountedArrowArray underlyingAllocation; | ||
| private final ArrowFieldNode fieldNode; | ||
| private final long arrowArrayOffset; | ||
| private final long[] buffers; | ||
| private final List<ArrowBuf> imported; | ||
|
|
||
| BufferImportTypeVisitor( | ||
| BufferAllocator allocator, | ||
| ReferenceCountedArrowArray underlyingAllocation, | ||
| ArrowFieldNode fieldNode, | ||
| long arrowArrayOffset, | ||
| long[] buffers) { | ||
| this.allocator = allocator; | ||
| this.underlyingAllocation = underlyingAllocation; | ||
| this.fieldNode = fieldNode; | ||
| this.arrowArrayOffset = arrowArrayOffset; | ||
| this.buffers = buffers; | ||
| this.imported = new ArrayList<>(); | ||
| } | ||
|
|
@@ -110,14 +114,63 @@ private ArrowBuf importFixedBits(ArrowType type, int index, long bitsPerSlot) { | |
| return importBuffer(type, index, capacity); | ||
| } | ||
|
|
||
| private ArrowBuf importFixedBitsWithOffset(ArrowType type, int index, long bitsPerSlot) { | ||
| // TODO: merge with importFixedBits | ||
| // Calculate the total capacity needed, including the offset | ||
| final long totalSlots = arrowArrayOffset + fieldNode.getLength(); | ||
| final long totalBits = totalSlots * bitsPerSlot; | ||
| final long capacity = DataSizeRoundingUtil.divideBy8Ceil(totalBits); | ||
|
|
||
| // Import the buffer with the calculated capacity | ||
| ArrowBuf buf = importBuffer(type, index, capacity); | ||
|
|
||
| // Calculate the start and end positions in bits | ||
| final long startBit = arrowArrayOffset * bitsPerSlot; | ||
| final long endBit = (arrowArrayOffset + fieldNode.getLength()) * bitsPerSlot; | ||
|
|
||
| // Calculate the start and end positions in bytes | ||
| // TODO: this cannot process bit boundaries in slicing | ||
| final long startByte = DataSizeRoundingUtil.divideBy8Ceil(startBit); | ||
| final long endByte = DataSizeRoundingUtil.divideBy8Ceil(endBit); | ||
|
|
||
| if (startByte != endByte) { | ||
| return buf.slice(startByte, endByte - startByte); | ||
| } else { | ||
| ArrowBuf bufCopy = allocator.buffer(buf.capacity()); | ||
| bufCopy.setZero(0, buf.capacity()); | ||
| for (int i = 0; i < bufCopy.capacity() * 8; i++) { | ||
| int bitIndex = (int) (i + arrowArrayOffset); | ||
| if (bitIndex < buf.capacity() * 8) { | ||
| if (BitVectorHelper.get(buf, bitIndex) == 1) { | ||
| BitVectorHelper.setBit(bufCopy, i); | ||
| } else { | ||
| BitVectorHelper.unsetBit(bufCopy, i); | ||
| } | ||
| } else { | ||
| BitVectorHelper.unsetBit(bufCopy, i); | ||
| } | ||
| } | ||
| imported.add(bufCopy); | ||
| return bufCopy; | ||
| } | ||
| } | ||
|
|
||
| private ArrowBuf importFixedBytes(ArrowType type, int index, long bytesPerSlot) { | ||
| final long capacity = bytesPerSlot * fieldNode.getLength(); | ||
| return importBuffer(type, index, capacity); | ||
| } | ||
|
|
||
| private ArrowBuf importFixedBytesWithOffset(ArrowType type, int index, long bytesPerSlot) { | ||
| final long capacity = bytesPerSlot * (fieldNode.getLength() + arrowArrayOffset); | ||
| ArrowBuf buf = importBuffer(type, index, capacity); | ||
| return buf.slice(arrowArrayOffset * bytesPerSlot, fieldNode.getLength() * bytesPerSlot); | ||
| } | ||
|
|
||
| private ArrowBuf importOffsets(ArrowType type, long bytesPerSlot) { | ||
| final long capacity = bytesPerSlot * (fieldNode.getLength() + 1); | ||
| return importBuffer(type, 1, capacity); | ||
| final long capacity = bytesPerSlot * (fieldNode.getLength() + arrowArrayOffset + 1); | ||
vibhatha marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ArrowBuf offsets = importBuffer(type, 1, capacity); | ||
| return offsets.slice( | ||
| arrowArrayOffset * bytesPerSlot, (long) (fieldNode.getLength() + 1) * bytesPerSlot); | ||
| } | ||
|
|
||
| private ArrowBuf importData(ArrowType type, long capacity) { | ||
|
|
@@ -137,6 +190,20 @@ private ArrowBuf maybeImportBitmap(ArrowType type) { | |
| return importFixedBits(type, 0, /*bitsPerSlot=*/ 1); | ||
| } | ||
|
|
||
| private ArrowBuf maybeImportBitmapWithOffset(ArrowType type) { | ||
| // TODO: merge with maybeImportBitMap | ||
| checkState( | ||
| buffers.length > 0, | ||
| "Expected at least %s buffers for type %s, but found %s", | ||
| 1, | ||
| type, | ||
| buffers.length); | ||
| if (buffers[0] == NULL) { | ||
| return null; | ||
| } | ||
| return importFixedBitsWithOffset(type, 0, /*bitsPerSlot=*/ 1); | ||
| } | ||
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.Null type) { | ||
| checkState( | ||
|
|
@@ -155,18 +222,19 @@ public List<ArrowBuf> visit(ArrowType.Struct type) { | |
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.List type) { | ||
| return Arrays.asList(maybeImportBitmap(type), importOffsets(type, ListVector.OFFSET_WIDTH)); | ||
| return Arrays.asList( | ||
| maybeImportBitmapWithOffset(type), importOffsets(type, ListVector.OFFSET_WIDTH)); | ||
| } | ||
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.LargeList type) { | ||
| return Arrays.asList( | ||
| maybeImportBitmap(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); | ||
| maybeImportBitmapWithOffset(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); | ||
| } | ||
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.FixedSizeList type) { | ||
| return Collections.singletonList(maybeImportBitmap(type)); | ||
| return Collections.singletonList(maybeImportBitmapWithOffset(type)); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -190,7 +258,8 @@ public List<ArrowBuf> visit(ArrowType.Map type) { | |
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.Int type) { | ||
| return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, type.getBitWidth())); | ||
| return Arrays.asList( | ||
| maybeImportBitmapWithOffset(type), importFixedBitsWithOffset(type, 1, type.getBitWidth())); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -212,19 +281,16 @@ public List<ArrowBuf> visit(ArrowType.FloatingPoint type) { | |
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.Utf8 type) { | ||
| try (ArrowBuf offsets = importOffsets(type, VarCharVector.OFFSET_WIDTH)) { | ||
| final int start = offsets.getInt(0); | ||
| final int end = offsets.getInt(fieldNode.getLength() * (long) VarCharVector.OFFSET_WIDTH); | ||
| checkState( | ||
| end >= start, | ||
| "Offset buffer for type %s is malformed: start: %s, end: %s", | ||
| type, | ||
| start, | ||
| end); | ||
| final int len = end - start; | ||
|
||
| offsets.getReferenceManager().retain(); | ||
| return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); | ||
| } | ||
| ArrowBuf offsets = importOffsets(type, VarCharVector.OFFSET_WIDTH); | ||
| final int start = offsets.getInt(0); | ||
| final int end = offsets.getInt((fieldNode.getLength()) * (long) VarCharVector.OFFSET_WIDTH); | ||
| checkState( | ||
| end >= start, | ||
| "Offset buffer for type %s is malformed: start: %s, end: %s", | ||
| type, | ||
| start, | ||
| end); | ||
| return Arrays.asList(maybeImportBitmapWithOffset(type), offsets, importData(type, end)); | ||
| } | ||
|
|
||
| private List<ArrowBuf> visitVariableWidthView(ArrowType type) { | ||
|
|
@@ -238,8 +304,8 @@ private List<ArrowBuf> visitVariableWidthView(ArrowType type) { | |
| importBuffer(type, variadicSizeBufferIndex, variadicSizeBufferCapacity); | ||
|
|
||
| ArrowBuf view = | ||
| importFixedBytes(type, viewBufferIndex, BaseVariableWidthViewVector.ELEMENT_SIZE); | ||
| buffers.add(maybeImportBitmap(type)); | ||
| importFixedBytesWithOffset(type, viewBufferIndex, BaseVariableWidthViewVector.ELEMENT_SIZE); | ||
| buffers.add(maybeImportBitmapWithOffset(type)); | ||
| buffers.add(view); | ||
|
|
||
| // 0th buffer is validity buffer | ||
|
|
@@ -280,19 +346,16 @@ public List<ArrowBuf> visit(ArrowType.LargeUtf8 type) { | |
|
|
||
| @Override | ||
| public List<ArrowBuf> visit(ArrowType.Binary type) { | ||
| try (ArrowBuf offsets = importOffsets(type, VarBinaryVector.OFFSET_WIDTH)) { | ||
| final int start = offsets.getInt(0); | ||
| final int end = offsets.getInt(fieldNode.getLength() * (long) VarBinaryVector.OFFSET_WIDTH); | ||
| checkState( | ||
| end >= start, | ||
| "Offset buffer for type %s is malformed: start: %s, end: %s", | ||
| type, | ||
| start, | ||
| end); | ||
| final int len = end - start; | ||
| offsets.getReferenceManager().retain(); | ||
| return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); | ||
| } | ||
| ArrowBuf offsets = importOffsets(type, VarBinaryVector.OFFSET_WIDTH); | ||
| final int start = offsets.getInt(0); | ||
| final int end = offsets.getInt(fieldNode.getLength() * (long) VarBinaryVector.OFFSET_WIDTH); | ||
| checkState( | ||
| end >= start, | ||
| "Offset buffer for type %s is malformed: start: %s, end: %s", | ||
| type, | ||
| start, | ||
| end); | ||
| return Arrays.asList(maybeImportBitmapWithOffset(type), offsets, importData(type, end)); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you mean to include this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lidavidm I was testing Java log enabling via this PR. And this should be removed. Plus I haven't completed the required offset changes to all vector types yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you move this to draft if it's not ready for review?