From a22d58abbba23bc4383f9198a1156cf429c8c5bd Mon Sep 17 00:00:00 2001 From: tianchen Date: Mon, 14 Oct 2019 15:32:51 +0800 Subject: [PATCH 1/4] ARROW-6871: [Java] Enhance TransferPair related parameters check and tests --- .../sort/FixedWidthInPlaceVectorSorter.java | 1 + .../vector/util/TransferPairBenchmarks.java | 129 +++++++++++++ .../main/codegen/templates/UnionVector.java | 7 +- .../arrow/vector/BaseFixedWidthVector.java | 9 +- .../arrow/vector/BaseVariableWidthVector.java | 7 +- .../org/apache/arrow/vector/BitVector.java | 5 +- .../vector/complex/FixedSizeListVector.java | 5 +- .../arrow/vector/complex/ListVector.java | 7 +- .../complex/NonNullableStructVector.java | 2 + .../arrow/vector/complex/StructVector.java | 5 +- .../arrow/vector/TestSplitAndTransfer.java | 172 ++++++++++++++++-- 11 files changed, 331 insertions(+), 18 deletions(-) create mode 100644 java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java index bac25cf8165..c5ecd8acb9b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -46,6 +46,7 @@ public void sortInPlace(V vec, VectorValueComparator comparator) { this.comparator = comparator; this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator()); this.pivotBuffer.allocateNew(1); + this.pivotBuffer.setValueCount(1); comparator.attachVectors(vec, pivotBuffer); quickSort(); diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java new file mode 100644 index 00000000000..b2194a5b66e --- /dev/null +++ b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeUnit; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * Benchmarks for {@link TransferPair}. + */ +@State(Scope.Benchmark) +public class TransferPairBenchmarks { + + private static final int VECTOR_LENGTH = 1024; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private IntVector intVector; + + private VarCharVector varCharVector; + + /** + * Setup benchmarks. + */ + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + intVector = new IntVector("intVector", allocator); + varCharVector = new VarCharVector("varcharVector", allocator); + + intVector.allocateNew(VECTOR_LENGTH); + varCharVector.allocateNew(VECTOR_LENGTH); + + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 3 == 0) { + intVector.setNull(i); + varCharVector.setNull(i); + } else { + intVector.setSafe(i, i * i); + varCharVector.setSafe(i, ("teststring" + i).getBytes(StandardCharsets.UTF_8)); + } + } + intVector.setValueCount(VECTOR_LENGTH); + varCharVector.setValueCount(VECTOR_LENGTH); + } + + /** + * Tear down benchmarks. + */ + @TearDown + public void tearDown() { + intVector.close(); + varCharVector.close();; + allocator.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public int copyValueSafeForIntVector() { + IntVector toVector = new IntVector("intVector", allocator); + toVector.setValueCount(VECTOR_LENGTH); + TransferPair transferPair = intVector.makeTransferPair(toVector); + for (int i = 0; i < VECTOR_LENGTH; i++) { + transferPair.copyValueSafe(i, i); + } + toVector.close(); + return 0; + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public int copyValueSafeForVarcharVector() { + VarCharVector toVector = new VarCharVector("varcharVector", allocator); + toVector.setValueCount(VECTOR_LENGTH); + TransferPair transferPair = varCharVector.makeTransferPair(toVector); + for (int i = 0; i < VECTOR_LENGTH; i++) { + transferPair.copyValueSafe(i, i); + } + toVector.close(); + return 0; + } + + public static void main(String [] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(TransferPairBenchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } +} + diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index db3c8a89f5e..93b8579ca45 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -408,6 +408,8 @@ public TransferPair makeTransferPair(ValueVector target) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(inIndex >= 0 && inIndex < from.getValueCount(), + "Invalid inIndex: %s", inIndex); UnionVector fromCast = (UnionVector) from; fromCast.getReader().setPosition(inIndex); getWriter().setPosition(outIndex); @@ -469,7 +471,10 @@ public void transfer() { @Override public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument(startIndex + length <= valueCount); + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); to.clear(); internalStructVectorTransferPair.splitAndTransfer(startIndex, length); final int startPoint = startIndex * TYPE_WIDTH; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index ed91d6e659b..fa9e12a1887 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -584,7 +584,10 @@ public void transferTo(BaseFixedWidthVector target) { */ public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) { - Preconditions.checkArgument(startIndex + length <= valueCount); + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); compareTypes(target, "splitAndTransferTo"); target.clear(); splitAndTransferValidityBuffer(startIndex, length, target); @@ -830,6 +833,8 @@ protected void handleSafe(int index) { @Override public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), + "Invalid fromIndex: %s", fromIndex); if (from.isNull(fromIndex)) { BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex); } else { @@ -851,6 +856,8 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { @Override public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), + "Invalid fromIndex: %s", fromIndex); handleSafe(thisIndex); copyFrom(fromIndex, thisIndex, from); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 7e276445643..ccd14c96905 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -714,6 +714,10 @@ public void transferTo(BaseVariableWidthVector target) { */ public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthVector target) { + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); compareTypes(target, "splitAndTransferTo"); target.clear(); splitAndTransferValidityBuffer(startIndex, length, target); @@ -750,7 +754,6 @@ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseVariab */ private void splitAndTransferValidityBuffer(int startIndex, int length, BaseVariableWidthVector target) { - Preconditions.checkArgument(startIndex + length <= valueCount); int firstByteSource = BitVectorHelper.byteIndex(startIndex); int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); int byteSizeTarget = getValidityBufferSizeFromCount(length); @@ -1336,6 +1339,8 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { @Override public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), + "Invalid fromIndex: %s", fromIndex); if (from.isNull(fromIndex)) { handleSafe(thisIndex, 0); fillHoles(thisIndex); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index 2fa5fdef4ef..b41c62677fc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -158,6 +158,10 @@ public int getBufferSize() { * @param target destination vector */ public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) { + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); compareTypes(target, "splitAndTransferTo"); target.clear(); target.validityBuffer = splitAndTransferBuffer(startIndex, length, target, @@ -174,7 +178,6 @@ private ArrowBuf splitAndTransferBuffer( BaseFixedWidthVector target, ArrowBuf sourceBuffer, ArrowBuf destBuffer) { - assert startIndex + length <= valueCount; int firstByteSource = BitVectorHelper.byteIndex(startIndex); int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); int byteSizeTarget = getValidityBufferSizeFromCount(length); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 653719feafe..8fa43fb06b7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -590,7 +590,10 @@ public void transfer() { @Override public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument(startIndex + length <= valueCount); + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); final int startPoint = listSize * startIndex; final int sliceLength = listSize * length; to.clear(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 59481d39399..03ab4900df9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -364,6 +364,8 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(inIndex >= 0 && inIndex < from.getValueCount(), + "Invalid inIndex: %s", inIndex); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); @@ -493,7 +495,10 @@ public void transfer() { */ @Override public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument(startIndex + length <= valueCount); + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; to.clear(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index fbd51235892..f739fcde928 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -99,6 +99,8 @@ public FieldReader getReader() { @Override public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), + "Invalid fromIndex: %s", fromIndex); if (ephPair == null || ephPair.from != from) { ephPair = (StructTransferPair) from.makeTransferPair(this); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java index ec922a032a9..7b22835963a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java @@ -195,7 +195,10 @@ public void copyValueSafe(int fromIndex, int toIndex) { @Override public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument(startIndex + length <= valueCount); + Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount, + "Invalid startIndex: %s", startIndex); + Preconditions.checkArgument(startIndex + length <= valueCount, + "Invalid length: %s", length); target.clear(); splitAndTransferValidityBuffer(startIndex, length, target); super.splitAndTransfer(startIndex, length); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index 6405e256b22..1e8728ece30 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -41,6 +42,17 @@ public void init() { public void terminate() throws Exception { allocator.close(); } + + private void populateVarcharVector(final VarCharVector vector, int valueCount, String[] compareArray) { + for (int i = 0; i < valueCount; i += 3) { + final String s = String.format("%010d", i); + vector.set(i, s.getBytes()); + if (compareArray != null) { + compareArray[i] = s; + } + } + vector.setValueCount(valueCount); + } @Test /* VarCharVector */ public void test() throws Exception { @@ -50,12 +62,7 @@ public void test() throws Exception { final int valueCount = 500; final String[] compareArray = new String[valueCount]; - for (int i = 0; i < valueCount; i += 3) { - final String s = String.format("%010d", i); - varCharVector.set(i, s.getBytes()); - compareArray[i] = s; - } - varCharVector.setValueCount(valueCount); + populateVarcharVector(varCharVector, valueCount, compareArray); final TransferPair tp = varCharVector.getTransferPair(allocator); final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); @@ -89,11 +96,7 @@ public void testMemoryConstrainedTransfer() { final int valueCount = 1000; - for (int i = 0; i < valueCount; i += 3) { - final String s = String.format("%010d", i); - varCharVector.set(i, s.getBytes()); - } - varCharVector.setValueCount(valueCount); + populateVarcharVector(varCharVector, valueCount, null); final TransferPair tp = varCharVector.getTransferPair(allocator); final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); @@ -107,4 +110,151 @@ public void testMemoryConstrainedTransfer() { } } } + + @Test + public void testTransfer() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { + varCharVector.allocateNew(10000, 1000); + + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + populateVarcharVector(varCharVector, valueCount, compareArray); + + final TransferPair tp = varCharVector.getTransferPair(allocator); + final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); + tp.transfer(); + + assertEquals(0, varCharVector.valueCount); + assertEquals(valueCount, newVarCharVector.valueCount); + + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[i].getBytes(); + assertFalse(newVarCharVector.isNull(i)); + assertArrayEquals(expectedValue, newVarCharVector.get(i)); + } else { + assertTrue(newVarCharVector.isNull(i)); + } + } + + newVarCharVector.clear(); + } + } + + @Test + public void testCopyValueSafe() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); + final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { + varCharVector.allocateNew(10000, 1000); + + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); + + // new vector memory is not pre-allocated, we expect copyValueSafe work fine. + for (int i = 0; i < valueCount; i++) { + tp.copyValueSafe(i, i); + } + newVarCharVector.setValueCount(valueCount); + + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + assertFalse(varCharVector.isNull(i)); + assertFalse(newVarCharVector.isNull(i)); + assertArrayEquals(varCharVector.get(i), newVarCharVector.get(i)); + } else { + assertTrue(newVarCharVector.isNull(i)); + } + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidCopyValueSafe() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); + final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { + varCharVector.allocateNew(10000, 1000); + + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); + + // fromIndex is invalid. + tp.copyValueSafe(valueCount, 0); + } + } + + @Test + public void testSplitAndTransferNon() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { + + varCharVector.allocateNew(10000, 1000); + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.getTransferPair(allocator); + VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); + + tp.splitAndTransfer(0, 0); + assertEquals(0, newVarCharVector.getValueCount()); + + newVarCharVector.clear(); + } + } + + @Test + public void testSplitAndTransferAll() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { + + varCharVector.allocateNew(10000, 1000); + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.getTransferPair(allocator); + VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); + + tp.splitAndTransfer(0, valueCount); + assertEquals(valueCount, newVarCharVector.getValueCount()); + + newVarCharVector.clear(); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidStartIndex() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); + final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { + + varCharVector.allocateNew(10000, 1000); + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); + + tp.splitAndTransfer(valueCount, 10); + + newVarCharVector.clear(); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidLength() { + try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); + final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { + + varCharVector.allocateNew(10000, 1000); + final int valueCount = 500; + populateVarcharVector(varCharVector, valueCount, null); + + final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); + + tp.splitAndTransfer(0, valueCount * 2); + + newVarCharVector.clear(); + } + } } From 01f9a48f25f523d48cff97a5a92d85f36015482d Mon Sep 17 00:00:00 2001 From: tianchen Date: Mon, 25 Nov 2019 11:11:54 +0800 Subject: [PATCH 2/4] revert changes in copyFrom --- .../vector/util/TransferPairBenchmarks.java | 129 ------------------ .../main/codegen/templates/UnionVector.java | 2 - .../arrow/vector/BaseFixedWidthVector.java | 4 - .../arrow/vector/BaseVariableWidthVector.java | 2 - .../arrow/vector/complex/ListVector.java | 2 - .../complex/NonNullableStructVector.java | 2 - .../arrow/vector/TestSplitAndTransfer.java | 33 ++--- 7 files changed, 13 insertions(+), 161 deletions(-) delete mode 100644 java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java deleted file mode 100644 index b2194a5b66e..00000000000 --- a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.util; - -import java.nio.charset.StandardCharsets; -import java.util.concurrent.TimeUnit; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; - -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** - * Benchmarks for {@link TransferPair}. - */ -@State(Scope.Benchmark) -public class TransferPairBenchmarks { - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private IntVector intVector; - - private VarCharVector varCharVector; - - /** - * Setup benchmarks. - */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - intVector = new IntVector("intVector", allocator); - varCharVector = new VarCharVector("varcharVector", allocator); - - intVector.allocateNew(VECTOR_LENGTH); - varCharVector.allocateNew(VECTOR_LENGTH); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 == 0) { - intVector.setNull(i); - varCharVector.setNull(i); - } else { - intVector.setSafe(i, i * i); - varCharVector.setSafe(i, ("teststring" + i).getBytes(StandardCharsets.UTF_8)); - } - } - intVector.setValueCount(VECTOR_LENGTH); - varCharVector.setValueCount(VECTOR_LENGTH); - } - - /** - * Tear down benchmarks. - */ - @TearDown - public void tearDown() { - intVector.close(); - varCharVector.close();; - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int copyValueSafeForIntVector() { - IntVector toVector = new IntVector("intVector", allocator); - toVector.setValueCount(VECTOR_LENGTH); - TransferPair transferPair = intVector.makeTransferPair(toVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - transferPair.copyValueSafe(i, i); - } - toVector.close(); - return 0; - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int copyValueSafeForVarcharVector() { - VarCharVector toVector = new VarCharVector("varcharVector", allocator); - toVector.setValueCount(VECTOR_LENGTH); - TransferPair transferPair = varCharVector.makeTransferPair(toVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - transferPair.copyValueSafe(i, i); - } - toVector.close(); - return 0; - } - - public static void main(String [] args) throws RunnerException { - Options opt = new OptionsBuilder() - .include(TransferPairBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} - diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 93b8579ca45..98d372aa97f 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -408,8 +408,6 @@ public TransferPair makeTransferPair(ValueVector target) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(inIndex >= 0 && inIndex < from.getValueCount(), - "Invalid inIndex: %s", inIndex); UnionVector fromCast = (UnionVector) from; fromCast.getReader().setPosition(inIndex); getWriter().setPosition(outIndex); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index fa9e12a1887..a4e94bcac09 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -833,8 +833,6 @@ protected void handleSafe(int index) { @Override public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), - "Invalid fromIndex: %s", fromIndex); if (from.isNull(fromIndex)) { BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex); } else { @@ -856,8 +854,6 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { @Override public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), - "Invalid fromIndex: %s", fromIndex); handleSafe(thisIndex); copyFrom(fromIndex, thisIndex, from); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index ccd14c96905..89395ef7e22 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -1339,8 +1339,6 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { @Override public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), - "Invalid fromIndex: %s", fromIndex); if (from.isNull(fromIndex)) { handleSafe(thisIndex, 0); fillHoles(thisIndex); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 03ab4900df9..312a3556082 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -364,8 +364,6 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(inIndex >= 0 && inIndex < from.getValueCount(), - "Invalid inIndex: %s", inIndex); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index f739fcde928..fbd51235892 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -99,8 +99,6 @@ public FieldReader getReader() { @Override public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - Preconditions.checkArgument(fromIndex >= 0 && fromIndex < from.getValueCount(), - "Invalid fromIndex: %s", fromIndex); if (ephPair == null || ephPair.from != from) { ephPair = (StructTransferPair) from.makeTransferPair(this); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index 1e8728ece30..753ab871076 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -29,6 +29,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.jupiter.api.Assertions; public class TestSplitAndTransfer { private BufferAllocator allocator; @@ -172,22 +173,6 @@ public void testCopyValueSafe() { } } - @Test(expected = IllegalArgumentException.class) - public void testInvalidCopyValueSafe() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - varCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - // fromIndex is invalid. - tp.copyValueSafe(valueCount, 0); - } - } - @Test public void testSplitAndTransferNon() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -224,7 +209,7 @@ public void testSplitAndTransferAll() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testInvalidStartIndex() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { @@ -235,13 +220,17 @@ public void testInvalidStartIndex() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - tp.splitAndTransfer(valueCount, 10); + IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(valueCount, 10)); + + assertEquals("Invalid startIndex: 500", e.getMessage()); newVarCharVector.clear(); } } - @Test(expected = IllegalArgumentException.class) + @Test public void testInvalidLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { @@ -252,7 +241,11 @@ public void testInvalidLength() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - tp.splitAndTransfer(0, valueCount * 2); + IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(0, valueCount * 2)); + + assertEquals("Invalid length: 1000", e.getMessage()); newVarCharVector.clear(); } From 0d3c7eab3344bcca36f42759959f3b410c9d28e6 Mon Sep 17 00:00:00 2001 From: tianchen Date: Fri, 17 Jan 2020 17:05:19 +0800 Subject: [PATCH 3/4] add benchmark --- .../vector/util/TransferPairBenchmarks.java | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java new file mode 100644 index 00000000000..f932a26de77 --- /dev/null +++ b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeUnit; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * Benchmarks for {@link TransferPair}. + */ +@State(Scope.Benchmark) +public class TransferPairBenchmarks { + + private static final int VECTOR_LENGTH = 1024; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private IntVector intVector; + + private VarCharVector varCharVector; + + /** + * Setup benchmarks. + */ + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + intVector = new IntVector("intVector", allocator); + varCharVector = new VarCharVector("varcharVector", allocator); + + intVector.allocateNew(VECTOR_LENGTH); + varCharVector.allocateNew(VECTOR_LENGTH); + + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 3 == 0) { + intVector.setNull(i); + varCharVector.setNull(i); + } else { + intVector.setSafe(i, i * i); + varCharVector.setSafe(i, ("teststring" + i).getBytes(StandardCharsets.UTF_8)); + } + } + intVector.setValueCount(VECTOR_LENGTH); + varCharVector.setValueCount(VECTOR_LENGTH); + } + + /** + * Tear down benchmarks. + */ + @TearDown + public void tearDown() { + intVector.close(); + varCharVector.close();; + allocator.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public int splitAndTransferIntVector() { + IntVector toVector = new IntVector("intVector", allocator); + toVector.setValueCount(VECTOR_LENGTH); + TransferPair transferPair = intVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, VECTOR_LENGTH); + toVector.close(); + return 0; + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public int splitAndTransferVarcharVector() { + VarCharVector toVector = new VarCharVector("varcharVector", allocator); + toVector.setValueCount(VECTOR_LENGTH); + TransferPair transferPair = varCharVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, VECTOR_LENGTH); + toVector.close(); + return 0; + } + + public static void main(String [] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(TransferPairBenchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } +} From f3b897ddd01abbc333e68829bcc3f075b6d32f8a Mon Sep 17 00:00:00 2001 From: tianchen Date: Sun, 19 Jan 2020 12:50:40 +0800 Subject: [PATCH 4/4] fix style --- .../org/apache/arrow/vector/util/TransferPairBenchmarks.java | 1 - .../test/java/org/apache/arrow/vector/TestSplitAndTransfer.java | 1 - 2 files changed, 2 deletions(-) diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java index f932a26de77..235eca53c84 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java +++ b/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java @@ -24,7 +24,6 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; - import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Mode; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index 753ab871076..d5e4a00d599 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -24,7 +24,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Before;