From 7c13ede14b89a1270838c41609e48c1f37409755 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Wed, 27 Nov 2019 21:16:19 +0800 Subject: [PATCH 1/5] [ARROW-7073][Java] Support concating vectors values in batch --- .../main/codegen/templates/UnionVector.java | 12 +- .../arrow/vector/util/VectorAppender.java | 320 ++++++++++++++++ .../arrow/vector/util/TestVectorAppender.java | 357 ++++++++++++++++++ 3 files changed, 685 insertions(+), 4 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index d760fe53fc6..8bf5d8a9b20 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -553,9 +553,13 @@ public Iterator iterator() { return vectors.iterator(); } - public ValueVector getVector(int index) { - int type = typeBuffer.getByte(index * TYPE_WIDTH); - switch (MinorType.values()[type]) { + public ValueVector getVector(int index) { + int type = typeBuffer.getByte(index * TYPE_WIDTH); + return getVectorByType(type); + } + + public ValueVector getVectorByType(int typeId) { + switch (MinorType.values()[typeId]) { case NULL: return null; <#list vv.types as type> @@ -574,7 +578,7 @@ public ValueVector getVector(int index) { case LIST: return getList(); default: - throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[type]); + throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java new file mode 100644 index 00000000000..6301cefe245 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.util.HashSet; + +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.TypeEqualsVisitor; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.UnionVector; + +import io.netty.util.internal.PlatformDependent; + +/** + * Utility to append two vectors together. + */ +public class VectorAppender implements VectorVisitor { + + /** + * The targetVector to be appended. + */ + private final ValueVector targetVector; + + private final TypeEqualsVisitor typeVisitor; + + /** + * Constructs a new targetVector appender, with the given targetVector. + * @param targetVector the targetVector to be appended. + */ + public VectorAppender(ValueVector targetVector) { + this.targetVector = targetVector; + typeVisitor = new TypeEqualsVisitor(targetVector); + } + + @Override + public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) { + Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()), + "The targetVector to append must have the same type as the targetVector being appended"); + + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append data buffer + PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(), + targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(), + deltaVector.getTypeWidth() * targetVector.getValueCount()); + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { + Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()), + "The targetVector to append must have the same type as the targetVector being appended"); + + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + int targetDataSize = targetVector.getOffsetBuffer().getInt( + targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + int deltaDataSize = deltaVector.getOffsetBuffer().getInt( + deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + int newValueCapacity = targetDataSize + deltaDataSize; + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + while (targetVector.getDataBuffer().capacity() < newValueCapacity) { + ((BaseVariableWidthVector) targetVector).reallocDataBuffer(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append data buffer + PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(), + targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize); + + // copy offset buffer + PlatformDependent.copyMemory( + deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH, + targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) * + BaseVariableWidthVector.OFFSET_WIDTH, + deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + + // increase each offset from the second buffer + for (int i = 0; i < deltaVector.getValueCount(); i++) { + int oldOffset = targetVector.getOffsetBuffer().getInt((targetVector.getValueCount() + 1 + i) * + BaseVariableWidthVector.OFFSET_WIDTH); + targetVector.getOffsetBuffer().setInt( + (targetVector.getValueCount() + 1 + i) * + BaseVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize); + } + ((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1); + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(ListVector deltaVector, Void value) { + Preconditions.checkArgument(typeVisitor.equals(deltaVector), + "The targetVector to append must have the same type as the targetVector being appended"); + + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + int targetListSize = targetVector.getOffsetBuffer().getInt( + targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + int deltaListSize = deltaVector.getOffsetBuffer().getInt( + deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + + ListVector targetListVector = (ListVector) targetVector; + + // make sure the underlying vector has value count set + targetListVector.getDataVector().setValueCount(targetListSize); + deltaVector.getDataVector().setValueCount(deltaListSize); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append offset buffer + PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH, + targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) * + ListVector.OFFSET_WIDTH, + deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); + + // increase each offset from the second buffer + for (int i = 0; i < deltaVector.getValueCount(); i++) { + int oldOffset = + targetVector.getOffsetBuffer().getInt((targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH); + targetVector.getOffsetBuffer().setInt((targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH, + oldOffset + targetListSize); + } + targetListVector.setLastSet(newValueCount - 1); + + // append underlying vectors + VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); + deltaVector.getDataVector().accept(innerAppender, null); + + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(FixedSizeListVector deltaVector, Void value) { + Preconditions.checkArgument(typeVisitor.equals(deltaVector), + "The vector to append must have the same type as the targetVector being appended"); + + FixedSizeListVector targetListVector = (FixedSizeListVector) targetVector; + + Preconditions.checkArgument(targetListVector.getListSize() == deltaVector.getListSize(), + "FixedSizeListVector must have the same list size to append"); + + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize(); + int deltaListSize = deltaVector.getValueCount() * deltaVector.getValueCount(); + + // make sure the underlying vector has value count set + targetListVector.getDataVector().setValueCount(targetListSize); + deltaVector.getDataVector().setValueCount(deltaListSize); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append underlying vectors + VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); + deltaVector.getDataVector().accept(innerAppender, null); + + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(NonNullableStructVector deltaVector, Void value) { + Preconditions.checkArgument(typeVisitor.equals(deltaVector), + "The vector to append must have the same type as the targetVector being appended"); + + NonNullableStructVector targetStructVector = (NonNullableStructVector) targetVector; + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append child vectors + for (int i = 0; i < targetStructVector.getChildrenFromFields().size(); i++) { + ValueVector targetChild = targetStructVector.getVectorById(i); + ValueVector deltaChild = deltaVector.getVectorById(i); + + targetChild.setValueCount(targetStructVector.getValueCount()); + deltaChild.setValueCount(deltaVector.getValueCount()); + + VectorAppender innerAppender = new VectorAppender(targetChild); + deltaChild.accept(innerAppender, null); + } + + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(UnionVector deltaVector, Void value) { + // we only make sure that both vectors are union vectors. + Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(), + "The vector to append must have the same type as the targetVector being appended"); + + UnionVector targetUnionVector = (UnionVector) targetVector; + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + // make sure there is enough capacity + while (targetUnionVector.getValueCapacity() < newValueCount) { + targetUnionVector.reAlloc(); + } + + // append type buffers + PlatformDependent.copyMemory(deltaVector.getValidityBufferAddress(), + targetUnionVector.getValidityBufferAddress() + targetVector.getValueCount(), + deltaVector.getValueCount()); + + // build the hash set for all types + HashSet targetTypes = new HashSet<>(); + for (int i = 0; i < targetUnionVector.getValueCount(); i++) { + targetTypes.add((int) targetUnionVector.getValidityBuffer().getByte(i)); + } + HashSet deltaTypes = new HashSet<>(); + for (int i = 0; i < deltaVector.getValueCount(); i++) { + deltaTypes.add((int) deltaVector.getValidityBuffer().getByte(i)); + } + + // append child vectors + for (int i = 0; i < Byte.MAX_VALUE; i++) { + if (targetTypes.contains(i) || deltaTypes.contains(i)) { + ValueVector targetChild = targetUnionVector.getVectorByType(i); + if (!targetTypes.contains(i)) { + // if the vector type does not exist in the target, it must be newly created + // and we must make sure it has enough capacity. + while (targetChild.getValueCapacity() < newValueCount) { + targetChild.reAlloc(); + } + } + + if (deltaTypes.contains(i)) { + // append child vectors + ValueVector deltaChild = deltaVector.getVectorByType(i); + + targetChild.setValueCount(targetUnionVector.getValueCount()); + deltaChild.setValueCount(deltaVector.getValueCount()); + + VectorAppender innerAppender = new VectorAppender(targetChild); + deltaChild.accept(innerAppender, null); + } + targetChild.setValueCount(newValueCount); + } + } + + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(NullVector deltaVector, Void value) { + Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()), + "The targetVector to append must have the same type as the targetVector being appended"); + return targetVector; + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java new file mode 100644 index 00000000000..d485dbfde60 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertTrue; +import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.holders.NullableBigIntHolder; +import org.apache.arrow.vector.holders.NullableFloat4Holder; +import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for {@link VectorAppender}. + */ +public class TestVectorAppender { + + private BufferAllocator allocator; + + @Before + public void prepare() { + allocator = new RootAllocator(1024 * 1024); + } + + @After + public void shutdown() { + allocator.close(); + } + + @Test + public void testAppendFixedWidthVector() { + final int length1 = 10; + final int length2 = 5; + try (IntVector target = new IntVector("", allocator); + IntVector delta = new IntVector("", allocator)) { + + target.allocateNew(length1); + delta.allocateNew(length2); + + for (int i = 0; i < length1; i++) { + target.set(i, i); + } + for (int i = 0; i < length2; i++) { + delta.set(i, i + length1); + } + target.setValueCount(length1); + delta.setValueCount(length2); + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(length1 + length2, target.getValueCount()); + for (int i = 0; i < target.getValueCount(); i++) { + assertEquals(i, target.get(i)); + } + } + } + + @Test + public void testAppendVariableWidthVector() { + final int length1 = 10; + final int length2 = 5; + try (VarCharVector target = new VarCharVector("", allocator); + VarCharVector delta = new VarCharVector("", allocator)) { + + target.allocateNew(5, length1); + delta.allocateNew(5, length2); + + for (int i = 0; i < length1; i++) { + target.setSafe(i, ("a" + i).getBytes()); + } + for (int i = 0; i < length2; i++) { + delta.setSafe(i, ("a" + (i + length1)).getBytes()); + } + target.setValueCount(length1); + delta.setValueCount(length2); + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(length1 + length2, target.getValueCount()); + for (int i = 0; i < target.getValueCount(); i++) { + assertEquals("a" + i, new String(target.get(i))); + } + } + } + + private ListVector createListVector(int start, int end, int step) { + final int listLength = (end - start) / step; + + ListVector listVector = ListVector.empty("list vector", allocator); + + Types.MinorType type = Types.MinorType.INT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + listVector.allocateNew(); + + IntVector dataVector = (IntVector) listVector.getDataVector(); + + // set underlying vectors + for (int i = 0; i < end - start; i++) { + dataVector.set(i, i + start); + } + dataVector.setValueCount(end - start); + + // set offset buffer + for (int i = 0; i < listLength; i++) { + BitVectorHelper.setBit(listVector.getValidityBuffer(), i); + listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * step); + listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * step); + } + listVector.setLastSet(listLength - 1); + listVector.setValueCount(listLength); + + return listVector; + } + + @Test + public void testAppendListVector() { + try (ListVector target = createListVector(0, 10, 2); + ListVector delta = createListVector(10, 20, 5)) { + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(7, target.getValueCount()); + + int curValue = 0; + for (int i = 0; i < 5; i++) { + List list = (List) target.getObject(i); + assertEquals(2, list.size()); + for (int j = 0; j < list.size(); j++) { + assertEquals(curValue++, list.get(j).intValue()); + } + } + + for (int i = 5; i < 7; i++) { + List list = (List) target.getObject(i); + assertEquals(5, list.size()); + for (int j = 0; j < list.size(); j++) { + assertEquals(curValue++, list.get(j).intValue()); + } + } + } + } + + private FixedSizeListVector createFixedSizeListVector(int start, int end, int step) { + final int listLength = (end - start) / step; + + FixedSizeListVector listVector = FixedSizeListVector.empty("fixed size list vector", step, allocator); + + Types.MinorType type = Types.MinorType.INT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + listVector.allocateNew(); + + IntVector dataVector = (IntVector) listVector.getDataVector(); + + // set underlying vectors + for (int i = 0; i < end - start; i++) { + dataVector.set(i, i + start); + } + dataVector.setValueCount(end - start); + + listVector.setValueCount(listLength); + + return listVector; + } + + @Test + public void testAppendFixedSizeListVector() { + try (ListVector target = createListVector(0, 10, 2); + ListVector delta = createListVector(10, 20, 5)) { + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(7, target.getValueCount()); + int curValue = 0; + for (int i = 0; i < 5; i++) { + List list = (List) target.getObject(i); + assertEquals(2, list.size()); + for (int j = 0; j < list.size(); j++) { + assertEquals(curValue++, list.get(j).intValue()); + } + } + + for (int i = 5; i < 7; i++) { + List list = (List) target.getObject(i); + assertEquals(5, list.size()); + for (int j = 0; j < list.size(); j++) { + assertEquals(curValue++, list.get(j).intValue()); + } + } + } + } + + private StructVector createStructVector(int start, int end) { + final StructVector vector = StructVector.empty("vector", allocator); + IntVector child1 = vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + VarCharVector child2 = vector.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); + + child1.allocateNew(); + child2.allocateNew(); + + for (int i = 0; i < end - start; i++) { + child1.setSafe(i, start + i); + child2.setSafe(i, ("a" + (start + i)).getBytes()); + } + child1.setValueCount(end - start); + child2.setLastSet(end - start - 1); + child2.setValueCount(end - start); + + vector.setValueCount(end - start); + return vector; + } + + @Test + public void testAppendStructVector() { + final int length1 = 10; + final int length2 = 5; + try (final StructVector target = createStructVector(0, length1); + final StructVector delta = createStructVector(length1, length1 + length2)) { + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(length1 + length2, target.getValueCount()); + IntVector child1 = (IntVector) target.getVectorById(0); + VarCharVector child2 = (VarCharVector) target.getVectorById(1); + + for (int i = 0; i < target.getValueCount(); i++) { + assertEquals(i, child1.get(i)); + assertEquals("a" + i, new String(child2.get(i))); + } + } + } + + private UnionVector createComplexUnionVector(int start, int end) { + final UnionVector vector = UnionVector.empty("vector", allocator); + + final NullableIntHolder intHolder = new NullableIntHolder(); + intHolder.isSet = 1; + + final NullableBigIntHolder longHolder = new NullableBigIntHolder(); + longHolder.isSet = 1; + + for (int i = 0; i < end - start; i++) { + vector.setType(i * 2, Types.MinorType.INT); + intHolder.value = i + start; + vector.setSafe(i * 2, intHolder); + + vector.setType(i * 2 + 1, Types.MinorType.BIGINT); + longHolder.value = i + start; + vector.setSafe(i * 2 + 1, longHolder); + } + + vector.setValueCount((end - start) * 2); + + return vector; + } + + private UnionVector createSingleUnionVector(int start, int end) { + final UnionVector vector = UnionVector.empty("vector", allocator); + + final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); + floatHolder.isSet = 1; + + for (int i = 0; i < end - start; i++) { + vector.setType(i, Types.MinorType.FLOAT4); + floatHolder.value = i + start; + vector.setSafe(i, floatHolder); + } + + vector.setValueCount(end - start); + return vector; + } + + @Test + public void testAppendUnionVector() { + final int length1 = 10; + final int length2 = 5; + + try (final UnionVector target = createComplexUnionVector(0, length1); + final UnionVector delta = createSingleUnionVector(length1, length1 + length2)) { + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(length1 * 2 + length2, target.getValueCount()); + + for (int i = 0; i < length1; i++) { + Object intObj = target.getObject(i * 2); + assertTrue(intObj instanceof Integer); + assertEquals(i, ((Integer) intObj).intValue()); + + Object longObj = target.getObject(i * 2 + 1); + assertTrue(longObj instanceof Long); + assertEquals(i, ((Long) longObj).longValue()); + } + + for (int i = 0; i < length2; i++) { + Object floatObj = target.getObject(length1 * 2 + i); + assertTrue(floatObj instanceof Float); + assertEquals(i + length1, ((Float) floatObj).intValue()); + } + } + } + + @Test + public void testAppendVectorNegative() { + final int vectorLength = 10; + try (IntVector target = new IntVector("", allocator); + VarCharVector delta = new VarCharVector("", allocator)) { + + target.allocateNew(vectorLength); + delta.allocateNew(vectorLength); + + VectorAppender appender = new VectorAppender(target); + + assertThrows(IllegalArgumentException.class, + () -> delta.accept(appender, null)); + } + } +} From c89211abcd0e0f4b2a6dc757a7a757390b2d1a31 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Mon, 13 Jan 2020 11:08:08 +0800 Subject: [PATCH 2/5] [ARROW-7073][Java] Rewrite tests with vector populator and provide static utility --- .../arrow/vector/util/VectorAppender.java | 8 +- .../vector/util/VectorBatchAppender.java | 39 +++ .../testing/ValueVectorDataPopulator.java | 63 +++- .../arrow/vector/util/TestVectorAppender.java | 271 ++++++++---------- .../vector/util/TestVectorBatchAppender.java | 72 +++++ 5 files changed, 294 insertions(+), 159 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 6301cefe245..3e8f04fa18f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -37,7 +37,7 @@ /** * Utility to append two vectors together. */ -public class VectorAppender implements VectorVisitor { +class VectorAppender implements VectorVisitor { /** * The targetVector to be appended. @@ -50,9 +50,9 @@ public class VectorAppender implements VectorVisitor { * Constructs a new targetVector appender, with the given targetVector. * @param targetVector the targetVector to be appended. */ - public VectorAppender(ValueVector targetVector) { + VectorAppender(ValueVector targetVector) { this.targetVector = targetVector; - typeVisitor = new TypeEqualsVisitor(targetVector); + typeVisitor = new TypeEqualsVisitor(targetVector, false, true); } @Override @@ -194,7 +194,7 @@ public ValueVector visit(FixedSizeListVector deltaVector, Void value) { int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize(); - int deltaListSize = deltaVector.getValueCount() * deltaVector.getValueCount(); + int deltaListSize = deltaVector.getValueCount() * deltaVector.getListSize(); // make sure the underlying vector has value count set targetListVector.getDataVector().setValueCount(targetListSize); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java new file mode 100644 index 00000000000..570783d1070 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import org.apache.arrow.vector.ValueVector; + +/** + * Utility to add vector values in batch. + */ +public class VectorBatchAppender { + + /** + * Add value vectors in batch. + * @param targetVector the target vector. + * @param vectorsToAppend the vectors to append. + * @param the vector type. + */ + public static void batchAppend(V targetVector, V... vectorsToAppend) { + VectorAppender appender = new VectorAppender(targetVector); + for (V delta : vectorsToAppend) { + delta.accept(appender, null); + } + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index ab3f81a6446..8c7bfab7411 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -17,10 +17,14 @@ package org.apache.arrow.vector.testing; +import static org.junit.Assert.assertEquals; + import java.nio.charset.StandardCharsets; +import java.util.List; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.DecimalVector; @@ -51,7 +55,12 @@ import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.holders.IntervalDayHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.FieldType; /** * Utility for populating {@link org.apache.arrow.vector.ValueVector}. @@ -533,10 +542,62 @@ public static void setVector(VarCharVector vector, String... values) { vector.allocateNewSafe(); for (int i = 0; i < length; i++) { if (values[i] != null) { - vector.set(i, values[i].getBytes(StandardCharsets.UTF_8)); + vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8)); } } vector.setValueCount(length); } + /** + * Populate values for {@link ListVector}. + */ + public static void setVector(ListVector vector, List... values) { + Types.MinorType type = Types.MinorType.INT; + vector.addOrGetVector(FieldType.nullable(type.getType())); + + IntVector dataVector = (IntVector) vector.getDataVector(); + dataVector.allocateNew(); + + // set underlying vectors + int curPos = 0; + vector.getOffsetBuffer().setInt(0, curPos); + for (int i = 0; i < values.length; i++) { + BitVectorHelper.setBit(vector.getValidityBuffer(), i); + for (int value : values[i]) { + dataVector.set(curPos, value); + curPos += 1; + } + vector.getOffsetBuffer().setInt((i + 1) * BaseRepeatedValueVector.OFFSET_WIDTH, curPos); + } + dataVector.setValueCount(curPos); + vector.setLastSet(values.length - 1); + vector.setValueCount(values.length); + } + + /** + * Populate values for {@link FixedSizeListVector}. + */ + public static void setVector(FixedSizeListVector vector, List... values) { + for (int i = 0; i < values.length; i++) { + assertEquals(vector.getListSize(), values[i].size()); + } + + Types.MinorType type = Types.MinorType.INT; + vector.addOrGetVector(FieldType.nullable(type.getType())); + + IntVector dataVector = (IntVector) vector.getDataVector(); + dataVector.allocateNew(); + + // set underlying vectors + int curPos = 0; + for (int i = 0; i < values.length; i++) { + BitVectorHelper.setBit(vector.getValidityBuffer(), i); + for (int value : values[i]) { + dataVector.set(curPos, value); + curPos += 1; + } + } + dataVector.setValueCount(curPos); + vector.setValueCount(values.length); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index d485dbfde60..86c72f3a872 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -19,23 +19,22 @@ import static junit.framework.TestCase.assertEquals; import static junit.framework.TestCase.assertTrue; -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.util.Arrays; import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; @@ -70,14 +69,8 @@ public void testAppendFixedWidthVector() { target.allocateNew(length1); delta.allocateNew(length2); - for (int i = 0; i < length1; i++) { - target.set(i, i); - } - for (int i = 0; i < length2; i++) { - delta.set(i, i + length1); - } - target.setValueCount(length1); - delta.setValueCount(length2); + ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + ValueVectorDataPopulator.setVector(delta, 10, 11, 12, 13, 14); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -99,14 +92,8 @@ public void testAppendVariableWidthVector() { target.allocateNew(5, length1); delta.allocateNew(5, length2); - for (int i = 0; i < length1; i++) { - target.setSafe(i, ("a" + i).getBytes()); - } - for (int i = 0; i < length2; i++) { - delta.setSafe(i, ("a" + (i + length1)).getBytes()); - } - target.setValueCount(length1); - delta.setValueCount(length2); + ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9"); + ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", "a14"); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -118,40 +105,27 @@ public void testAppendVariableWidthVector() { } } - private ListVector createListVector(int start, int end, int step) { - final int listLength = (end - start) / step; - - ListVector listVector = ListVector.empty("list vector", allocator); - - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - // set underlying vectors - for (int i = 0; i < end - start; i++) { - dataVector.set(i, i + start); - } - dataVector.setValueCount(end - start); - - // set offset buffer - for (int i = 0; i < listLength; i++) { - BitVectorHelper.setBit(listVector.getValidityBuffer(), i); - listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * step); - listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * step); - } - listVector.setLastSet(listLength - 1); - listVector.setValueCount(listLength); - - return listVector; - } - @Test public void testAppendListVector() { - try (ListVector target = createListVector(0, 10, 2); - ListVector delta = createListVector(10, 20, 5)) { + final int length1 = 5; + final int length2 = 2; + try (ListVector target = ListVector.empty("target", allocator); + ListVector delta = ListVector.empty("delta", allocator)) { + + target.allocateNew(); + ValueVectorDataPopulator.setVector(target, + Arrays.asList(0, 1), + Arrays.asList(2, 3), + Arrays.asList(4, 5), + Arrays.asList(6, 7), + Arrays.asList(8, 9)); + assertEquals(length1, target.getValueCount()); + + delta.allocateNew(); + ValueVectorDataPopulator.setVector(delta, + Arrays.asList(10, 11, 12, 13, 14), + Arrays.asList(15, 16, 17, 18, 19)); + assertEquals(length2, delta.getValueCount()); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -159,7 +133,7 @@ public void testAppendListVector() { assertEquals(7, target.getValueCount()); int curValue = 0; - for (int i = 0; i < 5; i++) { + for (int i = 0; i < length1; i++) { List list = (List) target.getObject(i); assertEquals(2, list.size()); for (int j = 0; j < list.size(); j++) { @@ -167,7 +141,7 @@ public void testAppendListVector() { } } - for (int i = 5; i < 7; i++) { + for (int i = length1; i < length1 + length2; i++) { List list = (List) target.getObject(i); assertEquals(5, list.size()); for (int j = 0; j < list.size(); j++) { @@ -177,48 +151,29 @@ public void testAppendListVector() { } } - private FixedSizeListVector createFixedSizeListVector(int start, int end, int step) { - final int listLength = (end - start) / step; - - FixedSizeListVector listVector = FixedSizeListVector.empty("fixed size list vector", step, allocator); - - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - // set underlying vectors - for (int i = 0; i < end - start; i++) { - dataVector.set(i, i + start); - } - dataVector.setValueCount(end - start); - - listVector.setValueCount(listLength); - - return listVector; - } - @Test public void testAppendFixedSizeListVector() { - try (ListVector target = createListVector(0, 10, 2); - ListVector delta = createListVector(10, 20, 5)) { + try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator); + FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) { + + target.allocateNew(); + ValueVectorDataPopulator.setVector(target, + Arrays.asList(0, 1, 2, 3, 4), + Arrays.asList(5, 6, 7, 8, 9)); + assertEquals(2, target.getValueCount()); + + delta.allocateNew(); + ValueVectorDataPopulator.setVector(delta, + Arrays.asList(10, 11, 12, 13, 14), + Arrays.asList(15, 16, 17, 18, 19)); + assertEquals(2, delta.getValueCount()); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); - assertEquals(7, target.getValueCount()); + assertEquals(4, target.getValueCount()); int curValue = 0; - for (int i = 0; i < 5; i++) { - List list = (List) target.getObject(i); - assertEquals(2, list.size()); - for (int j = 0; j < list.size(); j++) { - assertEquals(curValue++, list.get(j).intValue()); - } - } - - for (int i = 5; i < 7; i++) { + for (int i = 0; i < target.getValueCount(); i++) { List list = (List) target.getObject(i); assertEquals(5, list.size()); for (int j = 0; j < list.size(); j++) { @@ -228,32 +183,28 @@ public void testAppendFixedSizeListVector() { } } - private StructVector createStructVector(int start, int end) { - final StructVector vector = StructVector.empty("vector", allocator); - IntVector child1 = vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - VarCharVector child2 = vector.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - - child1.allocateNew(); - child2.allocateNew(); - - for (int i = 0; i < end - start; i++) { - child1.setSafe(i, start + i); - child2.setSafe(i, ("a" + (start + i)).getBytes()); - } - child1.setValueCount(end - start); - child2.setLastSet(end - start - 1); - child2.setValueCount(end - start); - - vector.setValueCount(end - start); - return vector; - } - @Test public void testAppendStructVector() { final int length1 = 10; final int length2 = 5; - try (final StructVector target = createStructVector(0, length1); - final StructVector delta = createStructVector(length1, length1 + length2)) { + try (final StructVector target = StructVector.empty("target", allocator); + final StructVector delta = StructVector.empty("delta", allocator)) { + + IntVector targetChild1 = target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); + targetChild1.allocateNew(); + targetChild2.allocateNew(); + ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9"); + target.setValueCount(length1); + + IntVector deltaChild1 = delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + VarCharVector deltaChild2 = delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); + deltaChild1.allocateNew(); + deltaChild2.allocateNew(); + ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(deltaChild2, "a10", "a11", "a12", "a13", "a14"); + delta.setValueCount(length2); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -269,53 +220,65 @@ public void testAppendStructVector() { } } - private UnionVector createComplexUnionVector(int start, int end) { - final UnionVector vector = UnionVector.empty("vector", allocator); - - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - - final NullableBigIntHolder longHolder = new NullableBigIntHolder(); - longHolder.isSet = 1; - - for (int i = 0; i < end - start; i++) { - vector.setType(i * 2, Types.MinorType.INT); - intHolder.value = i + start; - vector.setSafe(i * 2, intHolder); - - vector.setType(i * 2 + 1, Types.MinorType.BIGINT); - longHolder.value = i + start; - vector.setSafe(i * 2 + 1, longHolder); - } - - vector.setValueCount((end - start) * 2); - - return vector; - } - - private UnionVector createSingleUnionVector(int start, int end) { - final UnionVector vector = UnionVector.empty("vector", allocator); - - final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); - floatHolder.isSet = 1; - - for (int i = 0; i < end - start; i++) { - vector.setType(i, Types.MinorType.FLOAT4); - floatHolder.value = i + start; - vector.setSafe(i, floatHolder); - } - - vector.setValueCount(end - start); - return vector; - } - @Test public void testAppendUnionVector() { final int length1 = 10; final int length2 = 5; - try (final UnionVector target = createComplexUnionVector(0, length1); - final UnionVector delta = createSingleUnionVector(length1, length1 + length2)) { + try (final UnionVector target = UnionVector.empty("target", allocator); + final UnionVector delta = UnionVector.empty("delta", allocator)) { + + // alternating ints and big ints + target.setType(0, Types.MinorType.INT); + target.setType(1, Types.MinorType.BIGINT); + target.setType(2, Types.MinorType.INT); + target.setType(3, Types.MinorType.BIGINT); + target.setType(4, Types.MinorType.INT); + target.setType(5, Types.MinorType.BIGINT); + target.setType(6, Types.MinorType.INT); + target.setType(7, Types.MinorType.BIGINT); + target.setType(8, Types.MinorType.INT); + target.setType(9, Types.MinorType.BIGINT); + target.setType(10, Types.MinorType.INT); + target.setType(11, Types.MinorType.BIGINT); + target.setType(12, Types.MinorType.INT); + target.setType(13, Types.MinorType.BIGINT); + target.setType(14, Types.MinorType.INT); + target.setType(15, Types.MinorType.BIGINT); + target.setType(16, Types.MinorType.INT); + target.setType(17, Types.MinorType.BIGINT); + target.setType(18, Types.MinorType.INT); + target.setType(19, Types.MinorType.BIGINT); + + IntVector targetIntVec = target.getIntVector(); + targetIntVec.allocateNew(); + ValueVectorDataPopulator.setVector( + targetIntVec, + 0, null, 1, null, 2, null, 3, null, 4, null, 5, null, 6, null, 7, null, 8, null, 9, null); + assertEquals(length1 * 2, targetIntVec.getValueCount()); + + BigIntVector targetBigIntVec = target.getBigIntVector(); + targetBigIntVec.allocateNew(); + ValueVectorDataPopulator.setVector( + targetBigIntVec, + null, 0L, null, 1L, null, 2L, null, 3L, null, 4L, null, 5L, null, 6L, null, 7L, null, 8L, null, 9L); + assertEquals(length1 * 2, targetBigIntVec.getValueCount()); + + target.setValueCount(length1 * 2); + + // populate the delta vector + delta.setType(0, Types.MinorType.FLOAT4); + delta.setType(1, Types.MinorType.FLOAT4); + delta.setType(2, Types.MinorType.FLOAT4); + delta.setType(3, Types.MinorType.FLOAT4); + delta.setType(4, Types.MinorType.FLOAT4); + + Float4Vector deltaFloatVector = delta.getFloat4Vector(); + deltaFloatVector.allocateNew(); + ValueVectorDataPopulator.setVector(deltaFloatVector, 10f, 11f, 12f, 13f, 14f); + assertEquals(length2, deltaFloatVector.getValueCount()); + delta.setValueCount(length2); + VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java new file mode 100644 index 00000000000..799c25c0ad7 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import static junit.framework.TestCase.assertEquals; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.testing.ValueVectorDataPopulator; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for {@link VectorBatchAppender}. + */ +public class TestVectorBatchAppender { + + private BufferAllocator allocator; + + @Before + public void prepare() { + allocator = new RootAllocator(1024 * 1024); + } + + @After + public void shutdown() { + allocator.close(); + } + + @Test + public void testBatchAppendIntVector() { + final int length1 = 10; + final int length2 = 5; + final int length3 = 7; + try (IntVector target = new IntVector("", allocator); + IntVector delta1 = new IntVector("", allocator); + IntVector delta2 = new IntVector("", allocator)) { + + target.allocateNew(length1); + delta1.allocateNew(length2); + delta2.allocateNew(length3); + + ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + ValueVectorDataPopulator.setVector(delta1, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(delta2, 15, 16, 17, 18, 19, 20, 21); + + VectorBatchAppender.batchAppend(target, delta1, delta2); + + assertEquals(length1 + length2 + length3, target.getValueCount()); + for (int i = 0; i < target.getValueCount(); i++) { + assertEquals(i, target.get(i)); + } + } + } +} From ad33e234c9a8b09b4cc316708221f4012ae9047b Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sun, 19 Jan 2020 20:04:12 +0800 Subject: [PATCH 3/5] [ARROW-7073][Java] Rewrite tests with vector populator for result verification --- .../arrow/vector/util/TestVectorAppender.java | 89 ++++++++++++------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 86c72f3a872..d99331c57a7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -29,7 +29,11 @@ import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.compare.Range; +import org.apache.arrow.vector.compare.RangeEqualsVisitor; +import org.apache.arrow.vector.compare.TypeEqualsVisitor; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; @@ -76,8 +80,11 @@ public void testAppendFixedWidthVector() { delta.accept(appender, null); assertEquals(length1 + length2, target.getValueCount()); - for (int i = 0; i < target.getValueCount(); i++) { - assertEquals(i, target.get(i)); + + try (IntVector expected = new IntVector("expected", allocator)) { + expected.allocateNew(); + ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + assertVectorsEqual(expected, target); } } } @@ -98,9 +105,11 @@ public void testAppendVariableWidthVector() { VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); - assertEquals(length1 + length2, target.getValueCount()); - for (int i = 0; i < target.getValueCount(); i++) { - assertEquals("a" + i, new String(target.get(i))); + try (VarCharVector expected = new VarCharVector("expected", allocator)) { + expected.allocateNew(); + ValueVectorDataPopulator.setVector(expected, + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", "a14"); + assertVectorsEqual(expected, target); } } } @@ -132,22 +141,26 @@ public void testAppendListVector() { assertEquals(7, target.getValueCount()); - int curValue = 0; - for (int i = 0; i < length1; i++) { - List list = (List) target.getObject(i); - assertEquals(2, list.size()); - for (int j = 0; j < list.size(); j++) { - assertEquals(curValue++, list.get(j).intValue()); - } - } + List expected = Arrays.asList(0, 1); + assertEquals(expected, target.getObject(0)); - for (int i = length1; i < length1 + length2; i++) { - List list = (List) target.getObject(i); - assertEquals(5, list.size()); - for (int j = 0; j < list.size(); j++) { - assertEquals(curValue++, list.get(j).intValue()); - } - } + expected = Arrays.asList(2, 3); + assertEquals(expected, target.getObject(1)); + + expected = Arrays.asList(4, 5); + assertEquals(expected, target.getObject(2)); + + expected = Arrays.asList(6, 7); + assertEquals(expected, target.getObject(3)); + + expected = Arrays.asList(8, 9); + assertEquals(expected, target.getObject(4)); + + expected = Arrays.asList(10, 11, 12, 13, 14); + assertEquals(expected, target.getObject(5)); + + expected = Arrays.asList(15, 16, 17, 18, 19); + assertEquals(expected, target.getObject(6)); } } @@ -172,14 +185,11 @@ public void testAppendFixedSizeListVector() { delta.accept(appender, null); assertEquals(4, target.getValueCount()); - int curValue = 0; - for (int i = 0; i < target.getValueCount(); i++) { - List list = (List) target.getObject(i); - assertEquals(5, list.size()); - for (int j = 0; j < list.size(); j++) { - assertEquals(curValue++, list.get(j).intValue()); - } - } + + assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0)); + assertEquals(Arrays.asList(5, 6, 7, 8, 9), target.getObject(1)); + assertEquals(Arrays.asList(10, 11, 12, 13, 14), target.getObject(2)); + assertEquals(Arrays.asList(15, 16, 17, 18, 19), target.getObject(3)); } } @@ -213,9 +223,17 @@ public void testAppendStructVector() { IntVector child1 = (IntVector) target.getVectorById(0); VarCharVector child2 = (VarCharVector) target.getVectorById(1); - for (int i = 0; i < target.getValueCount(); i++) { - assertEquals(i, child1.get(i)); - assertEquals("a" + i, new String(child2.get(i))); + try (IntVector expected1 = new IntVector("expected1", allocator); + VarCharVector expected2 = new VarCharVector("expected2", allocator)) { + expected1.allocateNew(); + expected2.allocateNew(); + + ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(expected2, + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", "a14"); + + assertVectorsEqual(expected1, target.getChild("f0")); + assertVectorsEqual(expected2, target.getChild("f1")); } } } @@ -317,4 +335,13 @@ public void testAppendVectorNegative() { () -> delta.accept(appender, null)); } } + + public static void assertVectorsEqual(ValueVector vector1, ValueVector vector2) { + assertEquals(vector1.getValueCount(), vector2.getValueCount()); + + TypeEqualsVisitor typeEqualsVisitor = new TypeEqualsVisitor(vector1, false, false); + RangeEqualsVisitor equalsVisitor = + new RangeEqualsVisitor(vector1, vector2, (v1, v2) -> typeEqualsVisitor.equals(vector2)); + assertTrue(equalsVisitor.rangeEquals(new Range(0, 0, vector1.getValueCount()))); + } } From ee49dc6787ead692ae5bbc109596dbdfe5718d0d Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 21 Jan 2020 16:37:43 +0800 Subject: [PATCH 4/5] [ARROW-7073][Java] Add tests with null values --- .../arrow/vector/util/VectorAppender.java | 2 +- .../testing/ValueVectorDataPopulator.java | 28 +++++++++++------ .../arrow/vector/util/TestVectorAppender.java | 31 +++++++++---------- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 3e8f04fa18f..a9c4802de25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -75,7 +75,7 @@ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) { // append data buffer PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(), targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(), - deltaVector.getTypeWidth() * targetVector.getValueCount()); + deltaVector.getTypeWidth() * deltaVector.getValueCount()); targetVector.setValueCount(newValueCount); return targetVector; } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 8c7bfab7411..b50a3441322 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -562,10 +562,14 @@ public static void setVector(ListVector vector, List... values) { int curPos = 0; vector.getOffsetBuffer().setInt(0, curPos); for (int i = 0; i < values.length; i++) { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.set(curPos, value); - curPos += 1; + if (values[i] == null) { + BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); + } else { + BitVectorHelper.setBit(vector.getValidityBuffer(), i); + for (int value : values[i]) { + dataVector.set(curPos, value); + curPos += 1; + } } vector.getOffsetBuffer().setInt((i + 1) * BaseRepeatedValueVector.OFFSET_WIDTH, curPos); } @@ -579,7 +583,9 @@ public static void setVector(ListVector vector, List... values) { */ public static void setVector(FixedSizeListVector vector, List... values) { for (int i = 0; i < values.length; i++) { - assertEquals(vector.getListSize(), values[i].size()); + if (values[i] != null) { + assertEquals(vector.getListSize(), values[i].size()); + } } Types.MinorType type = Types.MinorType.INT; @@ -591,10 +597,14 @@ public static void setVector(FixedSizeListVector vector, List... values // set underlying vectors int curPos = 0; for (int i = 0; i < values.length; i++) { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.set(curPos, value); - curPos += 1; + if (values[i] == null) { + BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); + } else { + BitVectorHelper.setBit(vector.getValidityBuffer(), i); + for (int value : values[i]) { + dataVector.set(curPos, value); + curPos += 1; + } } } dataVector.setValueCount(curPos); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index d99331c57a7..83fc24fe90a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -73,8 +73,8 @@ public void testAppendFixedWidthVector() { target.allocateNew(length1); delta.allocateNew(length2); - ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector(delta, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9); + ValueVectorDataPopulator.setVector(delta, null, 11, 12, 13, 14); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -83,7 +83,7 @@ public void testAppendFixedWidthVector() { try (IntVector expected = new IntVector("expected", allocator)) { expected.allocateNew(); - ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9, null, 11, 12, 13, 14); assertVectorsEqual(expected, target); } } @@ -99,8 +99,8 @@ public void testAppendVariableWidthVector() { target.allocateNew(5, length1); delta.allocateNew(5, length2); - ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9"); - ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", "a14"); + ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9"); + ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", null); VectorAppender appender = new VectorAppender(target); delta.accept(appender, null); @@ -108,7 +108,7 @@ public void testAppendVariableWidthVector() { try (VarCharVector expected = new VarCharVector("expected", allocator)) { expected.allocateNew(); ValueVectorDataPopulator.setVector(expected, - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", "a14"); + "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", null); assertVectorsEqual(expected, target); } } @@ -125,7 +125,7 @@ public void testAppendListVector() { ValueVectorDataPopulator.setVector(target, Arrays.asList(0, 1), Arrays.asList(2, 3), - Arrays.asList(4, 5), + null, Arrays.asList(6, 7), Arrays.asList(8, 9)); assertEquals(length1, target.getValueCount()); @@ -147,8 +147,7 @@ public void testAppendListVector() { expected = Arrays.asList(2, 3); assertEquals(expected, target.getObject(1)); - expected = Arrays.asList(4, 5); - assertEquals(expected, target.getObject(2)); + assertTrue(target.isNull(2)); expected = Arrays.asList(6, 7); assertEquals(expected, target.getObject(3)); @@ -172,7 +171,7 @@ public void testAppendFixedSizeListVector() { target.allocateNew(); ValueVectorDataPopulator.setVector(target, Arrays.asList(0, 1, 2, 3, 4), - Arrays.asList(5, 6, 7, 8, 9)); + null); assertEquals(2, target.getValueCount()); delta.allocateNew(); @@ -187,7 +186,7 @@ public void testAppendFixedSizeListVector() { assertEquals(4, target.getValueCount()); assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0)); - assertEquals(Arrays.asList(5, 6, 7, 8, 9), target.getObject(1)); + assertTrue(target.isNull(1)); assertEquals(Arrays.asList(10, 11, 12, 13, 14), target.getObject(2)); assertEquals(Arrays.asList(15, 16, 17, 18, 19), target.getObject(3)); } @@ -204,15 +203,15 @@ public void testAppendStructVector() { VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); targetChild1.allocateNew(); targetChild2.allocateNew(); - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9"); + ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9); + ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9"); target.setValueCount(length1); IntVector deltaChild1 = delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); VarCharVector deltaChild2 = delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); deltaChild1.allocateNew(); deltaChild2.allocateNew(); - ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, null, 14); ValueVectorDataPopulator.setVector(deltaChild2, "a10", "a11", "a12", "a13", "a14"); delta.setValueCount(length2); @@ -228,9 +227,9 @@ public void testAppendStructVector() { expected1.allocateNew(); expected2.allocateNew(); - ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9, 10, 11, 12, null, 14); ValueVectorDataPopulator.setVector(expected2, - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", "a14"); + "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9", "a10", "a11", "a12", "a13", "a14"); assertVectorsEqual(expected1, target.getChild("f0")); assertVectorsEqual(expected2, target.getChild("f1")); From 94b407c859fa84bd01733c97d616eb83977a4b32 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 28 Jan 2020 16:40:15 +0800 Subject: [PATCH 5/5] [ARROW-7073][Java] Support dense union vector --- .../java/org/apache/arrow/vector/util/VectorAppender.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index a9c4802de25..8dac7676858 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.TypeEqualsVisitor; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -311,6 +312,11 @@ public ValueVector visit(UnionVector deltaVector, Void value) { return targetVector; } + @Override + public ValueVector visit(DenseUnionVector left, Void value) { + throw new UnsupportedOperationException(); + } + @Override public ValueVector visit(NullVector deltaVector, Void value) { Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),