From 035e886c6f683ecadb200808487f184dde403e4f Mon Sep 17 00:00:00 2001 From: Steven Phillips Date: Thu, 29 Jun 2017 20:05:17 -0700 Subject: [PATCH 1/2] Use buffer slice for splitAndTransfer in List and Union vectors --- .../main/codegen/templates/UnionVector.java | 6 ++-- .../arrow/vector/complex/ListVector.java | 30 ++++++++++++++----- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index aa9d34d6e26..eabe42a7c4c 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -321,10 +321,8 @@ public void transfer() { @Override public void splitAndTransfer(int startIndex, int length) { - to.allocateNew(); - for (int i = 0; i < length; i++) { - to.copyFromSafe(startIndex + i, i, org.apache.arrow.vector.complex.UnionVector.this); - } + internalMapVectorTransferPair.splitAndTransfer(startIndex, length); + typeVectorTransferPair.splitAndTransfer(startIndex, length); to.getMutator().setValueCount(length); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 63572945660..fdeac397165 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -38,6 +38,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -179,7 +180,11 @@ public TransferPair makeTransferPair(ValueVector target) { private class TransferImpl implements TransferPair { ListVector to; - TransferPair pairs[] = new TransferPair[3]; + TransferPair bitsTransferPair; + TransferPair offsetsTransferPair; + TransferPair dataTransferPair; + + TransferPair[] pairs; public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { this(new ListVector(name, allocator, fieldType, callBack)); @@ -188,12 +193,13 @@ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { public TransferImpl(ListVector to) { this.to = to; to.addOrGetVector(vector.getField().getFieldType()); - pairs[0] = offsets.makeTransferPair(to.offsets); - pairs[1] = bits.makeTransferPair(to.bits); + offsetsTransferPair = offsets.makeTransferPair(to.offsets); + bitsTransferPair = bits.makeTransferPair(to.bits); if (to.getDataVector() instanceof ZeroVector) { to.addOrGetVector(vector.getField().getFieldType()); } - pairs[2] = getDataVector().makeTransferPair(to.getDataVector()); + dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); + pairs = new TransferPair[] { bitsTransferPair, offsetsTransferPair, dataTransferPair }; } @Override @@ -206,10 +212,20 @@ public void transfer() { @Override public void splitAndTransfer(int startIndex, int length) { - to.allocateNew(); - for (int i = 0; i < length; i++) { - copyValueSafe(startIndex + i, i); + UInt4Vector.Accessor offsetVectorAccessor = ListVector.this.offsets.getAccessor(); + final int startPoint = offsetVectorAccessor.get(startIndex); + final int sliceLength = offsetVectorAccessor.get(startIndex + length) - startPoint; + to.clear(); + to.offsets.allocateNew(length + 1); + offsetVectorAccessor = ListVector.this.offsets.getAccessor(); + final UInt4Vector.Mutator targetOffsetVectorMutator = to.offsets.getMutator(); + for (int i = 0; i < length + 1; i++) { + targetOffsetVectorMutator.set(i, offsetVectorAccessor.get(startIndex + i) - startPoint); } + bitsTransferPair.splitAndTransfer(startIndex, length); + dataTransferPair.splitAndTransfer(startPoint, sliceLength); + to.lastSet = length; + to.mutator.setValueCount(length); } @Override From 3d89c999c8b1580a0bb2b5b039de2bf2c5e7c6e6 Mon Sep 17 00:00:00 2001 From: siddharth Date: Thu, 27 Jul 2017 15:12:58 -0700 Subject: [PATCH 2/2] ARROW-1192: splitAndTransfer changes for ListVector,UnionVector and corresponding unit tests Change-Id: I47037365f1424662ffb27e1e8478fc1bd45a45b2 --- .../apache/arrow/vector/TestListVector.java | 199 ++++++++++++++++++ .../apache/arrow/vector/TestUnionVector.java | 181 ++++++++++++++++ 2 files changed, 380 insertions(+) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 29ea7628f45..a1762c466ce 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.ListVector; @@ -30,6 +32,7 @@ import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.*; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -234,4 +237,200 @@ public void testSetLastSetUsage() throws Exception { assertNull(actual); } } + + @Test + public void testSplitAndTransfer() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + UnionListWriter listWriter = listVector.getWriter(); + + /* allocate memory */ + listWriter.allocate(); + + /* populate data */ + listWriter.setPosition(0); + listWriter.startList(); + listWriter.bigInt().writeBigInt(10); + listWriter.bigInt().writeBigInt(11); + listWriter.bigInt().writeBigInt(12); + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + listWriter.bigInt().writeBigInt(13); + listWriter.bigInt().writeBigInt(14); + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(16); + listWriter.bigInt().writeBigInt(17); + listWriter.bigInt().writeBigInt(18); + listWriter.endList(); + + listWriter.setPosition(3); + listWriter.startList(); + listWriter.bigInt().writeBigInt(19); + listWriter.endList(); + + listWriter.setPosition(4); + listWriter.startList(); + listWriter.bigInt().writeBigInt(20); + listWriter.bigInt().writeBigInt(21); + listWriter.bigInt().writeBigInt(22); + listWriter.bigInt().writeBigInt(23); + listWriter.endList(); + + listVector.getMutator().setValueCount(5); + + assertEquals(5, listVector.getMutator().getLastSet()); + + /* get offsetVector */ + UInt4Vector offsetVector = (UInt4Vector)listVector.getOffsetVector(); + + /* get dataVector */ + NullableBigIntVector dataVector = (NullableBigIntVector)listVector.getDataVector(); + + /* check the vector output */ + final UInt4Vector.Accessor offsetAccessor = offsetVector.getAccessor(); + final ValueVector.Accessor valueAccessor = dataVector.getAccessor(); + + int index = 0; + int offset = 0; + Object actual = null; + + /* index 0 */ + assertFalse(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(10), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(11), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(12), (Long)actual); + + /* index 1 */ + index++; + assertFalse(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(13), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(14), (Long)actual); + + /* index 2 */ + index++; + assertFalse(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(5), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(15), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(16), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(17), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(18), (Long)actual); + + /* index 3 */ + index++; + assertFalse(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(9), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(19), (Long)actual); + + /* index 4 */ + index++; + assertFalse(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(10), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(20), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(21), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(22), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(23), (Long)actual); + + /* index 5 */ + index++; + assertTrue(listVector.getAccessor().isNull(index)); + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(14), Integer.toString(offset)); + + /* do split and transfer */ + try (ListVector toVector = ListVector.empty("toVector", allocator)) { + + TransferPair transferPair = listVector.makeTransferPair(toVector); + + int[][] transferLengths = { {0, 2}, + {3, 1}, + {4, 1} + }; + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + + int dataLength1 = 0; + int dataLength2 = 0; + + int offset1 = 0; + int offset2 = 0; + + transferPair.splitAndTransfer(start, splitLength); + + /* get offsetVector of toVector */ + UInt4Vector offsetVector1 = (UInt4Vector)toVector.getOffsetVector(); + UInt4Vector.Accessor offsetAccessor1 = offsetVector1.getAccessor(); + + /* get dataVector of toVector */ + NullableBigIntVector dataVector1 = (NullableBigIntVector)toVector.getDataVector(); + NullableBigIntVector.Accessor valueAccessor1 = dataVector1.getAccessor(); + + for(int i = 0; i < splitLength; i++) { + dataLength1 = offsetAccessor.get(start + i + 1) - offsetAccessor.get(start + i); + dataLength2 = offsetAccessor1.get(i + 1) - offsetAccessor1.get(i); + + assertEquals("Different data lengths at index: " + i + " and start: " + start, + dataLength1, dataLength2); + + offset1 = offsetAccessor.get(start + i); + offset2 = offsetAccessor1.get(i); + + for(int j = 0; j < dataLength1; j++) { + assertEquals("Different data at indexes: " + offset1 + " and " + offset2, + valueAccessor.getObject(offset1), valueAccessor1.getObject(offset2)); + + offset1++; + offset2++; + } + } + } + } + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java index a5b90ee90b8..a5159242d76 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java @@ -24,6 +24,7 @@ import org.apache.arrow.vector.holders.NullableBitHolder; import org.apache.arrow.vector.holders.NullableIntHolder; import org.apache.arrow.vector.holders.NullableUInt4Holder; +import org.apache.arrow.vector.holders.NullableFloat4Holder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.util.TransferPair; @@ -117,6 +118,179 @@ public void testTransfer() throws Exception { } } + @Test + public void testSplitAndTransfer() throws Exception { + try (UnionVector sourceVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) { + final UnionVector.Mutator sourceMutator = sourceVector.getMutator(); + final UnionVector.Accessor sourceAccessor = sourceVector.getAccessor(); + + sourceVector.allocateNew(); + + /* populate the UnionVector */ + sourceMutator.setType(0, MinorType.INT); + sourceMutator.setSafe(0, newIntHolder(5)); + sourceMutator.setType(1, MinorType.INT); + sourceMutator.setSafe(1, newIntHolder(10)); + sourceMutator.setType(2, MinorType.INT); + sourceMutator.setSafe(2, newIntHolder(15)); + sourceMutator.setType(3, MinorType.INT); + sourceMutator.setSafe(3, newIntHolder(20)); + sourceMutator.setType(4, MinorType.INT); + sourceMutator.setSafe(4, newIntHolder(25)); + sourceMutator.setType(5, MinorType.INT); + sourceMutator.setSafe(5, newIntHolder(30)); + sourceMutator.setType(6, MinorType.INT); + sourceMutator.setSafe(6, newIntHolder(35)); + sourceMutator.setType(7, MinorType.INT); + sourceMutator.setSafe(7, newIntHolder(40)); + sourceMutator.setType(8, MinorType.INT); + sourceMutator.setSafe(8, newIntHolder(45)); + sourceMutator.setType(9, MinorType.INT); + sourceMutator.setSafe(9, newIntHolder(50)); + sourceMutator.setValueCount(10); + + /* check the vector output */ + assertEquals(10, sourceAccessor.getValueCount()); + assertEquals(false, sourceAccessor.isNull(0)); + assertEquals(5, sourceAccessor.getObject(0)); + assertEquals(false, sourceAccessor.isNull(1)); + assertEquals(10, sourceAccessor.getObject(1)); + assertEquals(false, sourceAccessor.isNull(2)); + assertEquals(15, sourceAccessor.getObject(2)); + assertEquals(false, sourceAccessor.isNull(3)); + assertEquals(20, sourceAccessor.getObject(3)); + assertEquals(false, sourceAccessor.isNull(4)); + assertEquals(25, sourceAccessor.getObject(4)); + assertEquals(false, sourceAccessor.isNull(5)); + assertEquals(30, sourceAccessor.getObject(5)); + assertEquals(false, sourceAccessor.isNull(6)); + assertEquals(35, sourceAccessor.getObject(6)); + assertEquals(false, sourceAccessor.isNull(7)); + assertEquals(40, sourceAccessor.getObject(7)); + assertEquals(false, sourceAccessor.isNull(8)); + assertEquals(45, sourceAccessor.getObject(8)); + assertEquals(false, sourceAccessor.isNull(9)); + assertEquals(50, sourceAccessor.getObject(9)); + + try(UnionVector toVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) { + + final TransferPair transferPair = sourceVector.makeTransferPair(toVector); + final UnionVector.Accessor toAccessor = toVector.getAccessor(); + + final int[][] transferLengths = { {0, 3}, + {3, 1}, + {4, 2}, + {6, 1}, + {7, 1}, + {8, 2} + }; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing the splitAndTransfer */ + for (int i = 0; i < length; i++) { + assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceAccessor.getObject(start + i), + toAccessor.getObject(i)); + } + } + } + } + } + + @Test + public void testSplitAndTransferWithMixedVectors() throws Exception { + try (UnionVector sourceVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) { + final UnionVector.Mutator sourceMutator = sourceVector.getMutator(); + final UnionVector.Accessor sourceAccessor = sourceVector.getAccessor(); + + sourceVector.allocateNew(); + + /* populate the UnionVector */ + sourceMutator.setType(0, MinorType.INT); + sourceMutator.setSafe(0, newIntHolder(5)); + + sourceMutator.setType(1, MinorType.FLOAT4); + sourceMutator.setSafe(1, newFloat4Holder(5.5f)); + + sourceMutator.setType(2, MinorType.INT); + sourceMutator.setSafe(2, newIntHolder(10)); + + sourceMutator.setType(3, MinorType.FLOAT4); + sourceMutator.setSafe(3, newFloat4Holder(10.5f)); + + sourceMutator.setType(4, MinorType.INT); + sourceMutator.setSafe(4, newIntHolder(15)); + + sourceMutator.setType(5, MinorType.FLOAT4); + sourceMutator.setSafe(5, newFloat4Holder(15.5f)); + + sourceMutator.setType(6, MinorType.INT); + sourceMutator.setSafe(6, newIntHolder(20)); + + sourceMutator.setType(7, MinorType.FLOAT4); + sourceMutator.setSafe(7, newFloat4Holder(20.5f)); + + sourceMutator.setType(8, MinorType.INT); + sourceMutator.setSafe(8, newIntHolder(30)); + + sourceMutator.setType(9, MinorType.FLOAT4); + sourceMutator.setSafe(9, newFloat4Holder(30.5f)); + sourceMutator.setValueCount(10); + + /* check the vector output */ + assertEquals(10, sourceAccessor.getValueCount()); + assertEquals(false, sourceAccessor.isNull(0)); + assertEquals(5, sourceAccessor.getObject(0)); + assertEquals(false, sourceAccessor.isNull(1)); + assertEquals(5.5f, sourceAccessor.getObject(1)); + assertEquals(false, sourceAccessor.isNull(2)); + assertEquals(10, sourceAccessor.getObject(2)); + assertEquals(false, sourceAccessor.isNull(3)); + assertEquals(10.5f, sourceAccessor.getObject(3)); + assertEquals(false, sourceAccessor.isNull(4)); + assertEquals(15, sourceAccessor.getObject(4)); + assertEquals(false, sourceAccessor.isNull(5)); + assertEquals(15.5f, sourceAccessor.getObject(5)); + assertEquals(false, sourceAccessor.isNull(6)); + assertEquals(20, sourceAccessor.getObject(6)); + assertEquals(false, sourceAccessor.isNull(7)); + assertEquals(20.5f, sourceAccessor.getObject(7)); + assertEquals(false, sourceAccessor.isNull(8)); + assertEquals(30, sourceAccessor.getObject(8)); + assertEquals(false, sourceAccessor.isNull(9)); + assertEquals(30.5f, sourceAccessor.getObject(9)); + + try(UnionVector toVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) { + + final TransferPair transferPair = sourceVector.makeTransferPair(toVector); + final UnionVector.Accessor toAccessor = toVector.getAccessor(); + + final int[][] transferLengths = { {0, 2}, + {2, 1}, + {3, 2}, + {5, 3}, + {8, 2} + }; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing the splitAndTransfer */ + for (int i = 0; i < length; i++) { + assertEquals("Different values at index: " + i, sourceAccessor.getObject(start + i), toAccessor.getObject(i)); + } + } + } + } + } + private static NullableIntHolder newIntHolder(int value) { final NullableIntHolder holder = new NullableIntHolder(); holder.isSet = 1; @@ -130,4 +304,11 @@ private static NullableBitHolder newBitHolder(boolean value) { holder.value = value ? 1 : 0; return holder; } + + private static NullableFloat4Holder newFloat4Holder(float value) { + final NullableFloat4Holder holder = new NullableFloat4Holder(); + holder.isSet = 1; + holder.value = value; + return holder; + } }