From 9d98f2a7378f3f523ec3012d1e34797d987fe97e Mon Sep 17 00:00:00 2001 From: "chenweiguo.vc" Date: Mon, 4 Nov 2024 23:30:27 +0800 Subject: [PATCH 1/3] VariableWidthViewVectorBenchmarks --- .../vector/VariableWidthVectorBenchmarks.java | 5 +- .../VariableWidthViewVectorBenchmarks.java | 128 ++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java index 0bce6569d26..d1bc0709ee1 100644 --- a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Scope; @@ -53,7 +54,7 @@ public class VariableWidthVectorBenchmarks { private VarCharVector vector; /** Setup benchmarks. */ - @Setup + @Setup(Level.Iteration) public void prepare() { allocator = new RootAllocator(ALLOCATOR_CAPACITY); vector = new VarCharVector("vector", allocator); @@ -63,7 +64,7 @@ public void prepare() { } /** Tear down benchmarks. */ - @TearDown + @TearDown(Level.Iteration) public void tearDown() { arrowBuff.close(); vector.close(); diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java new file mode 100644 index 00000000000..cc571c211b5 --- /dev/null +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.concurrent.TimeUnit; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.holders.NullableViewVarCharHolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** Benchmarks for {@link BaseVariableWidthVector}. */ +@State(Scope.Benchmark) +public class VariableWidthViewVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod + + private static final int VECTOR_CAPACITY = 16 * 1024; + + private static final int VECTOR_LENGTH = 1024; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private static byte[] bytes = VariableWidthVectorBenchmarks.class.getName().getBytes(); + private ArrowBuf arrowBuff; + + private BufferAllocator allocator; + + private ViewVarCharVector vector; + + /** Setup benchmarks. */ + @Setup(Level.Iteration) + public void prepare() { + allocator = new RootAllocator(); + vector = new ViewVarCharVector("vector", allocator); + vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH); + arrowBuff = allocator.buffer(VECTOR_LENGTH); + arrowBuff.setBytes(0, bytes, 0, bytes.length); + } + + /** Tear down benchmarks. */ + @TearDown(Level.Iteration) + public void tearDown() { + arrowBuff.close(); + vector.close(); + allocator.close(); + } + + /** + * Test {@link BaseVariableWidthVector#getValueCapacity()}. + * + * @return useless. To avoid DCE by JIT. + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Benchmark + @Fork(1) + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public int setSafeFromArray() { + for (int i = 0; i < 500; ++i) { + vector.setSafe(i * 40, bytes); + } + return vector.getBufferSize(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public int setSafeFromNullableVarcharHolder() { + NullableViewVarCharHolder nvch = new NullableViewVarCharHolder(); + nvch.buffer = arrowBuff; + nvch.start = 0; + nvch.end = bytes.length; + for (int i = 0; i < 50; ++i) { + nvch.isSet = 0; + for (int j = 0; j < 9; ++j) { + int idx = 10 * i + j; + vector.setSafe(idx, nvch); + } + nvch.isSet = 1; + vector.setSafe(10 * (i + 1), nvch); + } + return vector.getBufferSize(); + } + + public static void main(String[] args) throws RunnerException { + Options opt = + new OptionsBuilder() + .include(VariableWidthViewVectorBenchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } + // checkstyle:on: MissingJavadocMethod +} From 76f3fc1f6efe50c4302d15d0e18ee077cc06fd34 Mon Sep 17 00:00:00 2001 From: "chenweiguo.vc" Date: Mon, 4 Nov 2024 23:56:21 +0800 Subject: [PATCH 2/3] VariableWidthViewVectorBenchmarks --- .../arrow/vector/VariableWidthVectorBenchmarks.java | 6 +++++- .../arrow/vector/VariableWidthViewVectorBenchmarks.java | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java index d1bc0709ee1..3249918b447 100644 --- a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java @@ -26,6 +26,7 @@ import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -53,6 +54,9 @@ public class VariableWidthVectorBenchmarks { private VarCharVector vector; + @Param({"1", "2", "10", "40"}) + private int step; + /** Setup benchmarks. */ @Setup(Level.Iteration) public void prepare() { @@ -88,7 +92,7 @@ public int getValueCapacity() { @OutputTimeUnit(TimeUnit.MILLISECONDS) public int setSafeFromArray() { for (int i = 0; i < 500; ++i) { - vector.setSafe(i * 40, bytes); + vector.setSafe(i * step, bytes); } return vector.getBufferSize(); } diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java index cc571c211b5..9a04f868e03 100644 --- a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java @@ -23,10 +23,10 @@ import org.apache.arrow.vector.holders.NullableViewVarCharHolder; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -54,6 +54,9 @@ public class VariableWidthViewVectorBenchmarks { private ViewVarCharVector vector; + @Param({"1", "2", "10", "40"}) + private int step; + /** Setup benchmarks. */ @Setup(Level.Iteration) public void prepare() { @@ -85,12 +88,11 @@ public int getValueCapacity() { } @Benchmark - @Fork(1) @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) public int setSafeFromArray() { for (int i = 0; i < 500; ++i) { - vector.setSafe(i * 40, bytes); + vector.setSafe(i * step, bytes); } return vector.getBufferSize(); } From 30e595411e37b7ca2739c7005442ed7b329eeaea Mon Sep 17 00:00:00 2001 From: "chenweiguo.vc" Date: Tue, 5 Nov 2024 22:33:37 +0800 Subject: [PATCH 3/3] only set zero if necessary --- .../vector/BaseVariableWidthViewVector.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 15d21827839..1ad2144c549 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -1367,11 +1367,13 @@ protected ArrowBuf allocateOrGetLastDataBuffer(int length) { protected final void setBytes(int index, byte[] value, int start, int length) { int writePosition = index * ELEMENT_SIZE; - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - if (length <= INLINE_SIZE) { + // to clear the memory segment of view being written to + // if it has been set + if (viewBuffer.getLong(writePosition) != 0 || viewBuffer.getLong(writePosition + 8) != 0) { + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + } + // allocate inline buffer // set length viewBuffer.setInt(writePosition, length); @@ -1411,11 +1413,13 @@ protected final void setBytes(int index, byte[] value, int start, int length) { protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) { int writePosition = index * ELEMENT_SIZE; - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - if (length <= INLINE_SIZE) { + // to clear the memory segment of view being written to + // if it has been set + if (viewBuffer.getLong(writePosition) != 0 || viewBuffer.getLong(writePosition + 8) != 0) { + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + } + // allocate inline buffer // set length viewBuffer.setInt(writePosition, length);