Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -211,23 +211,25 @@ public void splitAndTransfer(int startIndex, int length) {
startIndex,
length,
valueCount);
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength =
offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset =
offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
if (length > 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we just return if length=0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can do though my personal preference would be to avoid multiple returns. Current implementation is in keeping with how splitAndTransfer is implemented for other complex types (e.g. ListVector).

final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength =
offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset =
offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@
*/
package org.apache.arrow.vector;

import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
Expand All @@ -36,6 +39,7 @@
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -223,6 +227,40 @@ public void testWithZeroVector() {
// no allocations to clear for ZeroVector
}

@Test
public void testListVectorWithEmptyMapVector() {
// List<element: Map(false)<entries: Struct<key: Utf8 not null, value: Utf8> not null>>
int valueCount = 1;
List<Field> children = new ArrayList<>();
children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), null));
children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), null));
Field structField =
new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), children);

Field mapField =
new Field("element", FieldType.notNullable(new ArrowType.Map(false)), asList(structField));

Field listField = new Field("list", FieldType.nullable(new ArrowType.List()), asList(mapField));

ListVector fromListVector = (ListVector) listField.createVector(allocator);
fromListVector.allocateNew();
fromListVector.setValueCount(valueCount);

// child vector is empty
MapVector dataVector = (MapVector) fromListVector.getDataVector();
dataVector.allocateNew();
// unset capacity to mimic observed failure mode
dataVector.getOffsetBuffer().capacity(0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, a list vector is supposed to have 1 offset when it's empty. But I suppose other parts of the library have this issue/"optimization"


TransferPair transferPair = fromListVector.getTransferPair(fromListVector.getAllocator());
transferPair.splitAndTransfer(0, valueCount);
ListVector toListVector = (ListVector) transferPair.getTo();

assertEquals(valueCount, toListVector.getValueCount());
fromListVector.clear();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use try-with-resources

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that there're several closeable resources here and toListVector cannot be constructed inside the same try as fromListVector. This is just to say that there'd be some ugly nesting if using try.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. Sure.

toListVector.clear();
}

@Test /* VarCharVector */
public void test() throws Exception {
try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
Expand Down