From 0c3d15645af0b026da59055964e8616312c6d243 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Thu, 25 Apr 2019 14:28:54 +0800 Subject: [PATCH 1/7] [ARROW-5209][Java]Provide initial performance benchmarks from SQL workloads --- .../sql/SqlPerormancefTestBase.java | 364 ++++++++++++++++++ .../vector/performance/sql/TpchQ1Test.java | 222 +++++++++++ 2 files changed, 586 insertions(+) create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java new file mode 100644 index 00000000000..3dece132cd1 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.performance.sql; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.FixedWidthVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VariableWidthVector; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; + +import java.util.Random; +import java.util.function.Supplier; + +import static org.apache.arrow.vector.types.DateUnit.DAY; + +/** + * The base class for all SQL performance evaluations. + */ +public abstract class SqlPerormancefTestBase { + + /** + * The total amount of memory used in each evaluation. + */ + public static final long ALLOCATION_CAPACITY = 5 * 1024 * 1024L; + + /** + * The default capacity of each vector. + */ + public static final int DEFAULT_CAPACITY = 1024; + + /** + * The probability that a randomly generated value be null. + */ + public static final double NULL_PROBABILITY = 0.1; + + /** + * The minimum length of a random string (inclusive). + */ + public static final int VAR_LENGTH_DATA_MIN_SIZE = 5; + + /** + * The maximum length of a random string (exclusive). + */ + public static final int VAR_LENGTH_DATA_MAX_SIZE = 20; + + /** + * The initial size reserved for a random string. + */ + public static final int VAR_LENGTH_DATA_INIT_SIZE = 16; + + // /UTILITIES FOR DATA TYPES FROM HERE + + public static final FieldType BOOLEAN_TYPE = new FieldType(true, new ArrowType.Bool(), null); + + public static final FieldType TINY_INT_TYPE = new FieldType(true, new ArrowType.Int(8, true), null); + + public static final FieldType SMALL_INT_TYPE = new FieldType(true, new ArrowType.Int(16, true), null); + + public static final FieldType INT_TYPE = new FieldType(true, new ArrowType.Int(32, true), null); + + public static final FieldType BIG_INT_TYPE = new FieldType(true, new ArrowType.Int(64, true), null); + + public static final FieldType FLOAT_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); + + public static final FieldType DOUBLE_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); + + public static final FieldType STRING_TYPE = new FieldType(true, new ArrowType.Utf8(), null); + + public static final FieldType BYTE_ARRAY_TYPE = new FieldType(true, new ArrowType.Binary(), null); + + public static final FieldType DATE_DAY_TYPE = new FieldType(true, new ArrowType.Date(DAY), null); + + /** + * The allocator for all vectors. + */ + protected RootAllocator allocator = new RootAllocator(ALLOCATION_CAPACITY); + + /** + * Random number generator. + * A constant seed is applied so the evaluations can be reproduced with exactly the same data. + */ + private Random random = new Random(0); + + /** + * The number of times each benchmark runs. + */ + protected int numRepeats = 10; + + /** + * The template method for running a benchmark. + * @param name name of the benchmark, used for display. + * @param benchmark the benchmark to run. + * @param warmUp the number of warm-ups to run, if it is a positive integer. + */ + protected void runBenchmark(String name, Supplier benchmark, int warmUp) { + if (warmUp > 0) { + // run warm-ups + for (int i = 0; i < warmUp; i++) { + benchmark.get(); + } + } + + double minDuration = Double.MAX_VALUE; + double maxDuration = Double.MIN_VALUE; + double totalDuration = 0; + + // run benchmark + for (int i = 0; i < numRepeats; i++) { + double duration = benchmark.get().doubleValue() / 1e6; + + totalDuration += duration; + if (duration > maxDuration) { + maxDuration = duration; + } + + if (duration < minDuration) { + minDuration = duration; + } + } + + // display statistics. + System.out.println("Statistics for benchmark " + name + ":\n" + + "Num of repeats = " + numRepeats + "\n" + + "Max duration = " + maxDuration + "ms\n" + + "Min duration = " + minDuration + "ms\n" + + "Avg duration = " + totalDuration / numRepeats + "ms"); + } + + // /UTILITY METHODS FROM HERE + + /** + * Utility method to create vectors. + * @param fields the schema of the vectors. + * @param fillData if it is true, fill the vectors will random data. + * @return the created vectors. + */ + public ValueVector[] createVectors(Field[] fields, boolean fillData) { + ValueVector[] ret = new ValueVector[fields.length]; + for (int i = 0; i < fields.length; i++) { + // create an empty vector + ret[i] = createVector(fields[i]); + + // reserve memory space for the vector + if (ret[i] instanceof FixedWidthVector) { + ((FixedWidthVector) ret[i]).allocateNew(DEFAULT_CAPACITY); + } else if (ret[i] instanceof VariableWidthVector) { + ((VariableWidthVector) ret[i]).allocateNew(VAR_LENGTH_DATA_INIT_SIZE * DEFAULT_CAPACITY, DEFAULT_CAPACITY); + } + + // fill random data, if necessary + if (fillData) { + fillRandomData(ret[i]); + } + } + return ret; + } + + /** + * Create an empty vector. + * @param field The name and data type of the vector to create. + * @return the created vector. + */ + protected ValueVector createVector(Field field) { + ArrowType vectorType = field.getType(); + String name = field.getName(); + + if (vectorType instanceof ArrowType.Bool) { + return new BitVector(name, allocator); + } else if (vectorType instanceof ArrowType.Int) { + ArrowType.Int intType = (ArrowType.Int) vectorType; + switch (intType.getBitWidth()) { + case 8: + return new TinyIntVector(name, allocator); + case 16: + return new SmallIntVector(name, allocator); + case 32: + return new IntVector(name, allocator); + case 64: + return new BigIntVector(name, allocator); + default: + throw new IllegalArgumentException("Unknown width for int type: " + intType.getBitWidth()); + } + } else if (vectorType instanceof ArrowType.FloatingPoint) { + ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vectorType; + switch (floatType.getPrecision()) { + case SINGLE: + return new Float4Vector(name, allocator); + case DOUBLE: + return new Float8Vector(name, allocator); + default: + throw new IllegalArgumentException("Unknown precision for float type: " + floatType.getPrecision()); + } + } else if (vectorType instanceof ArrowType.Binary) { + return new VarBinaryVector(name, allocator); + } else if (vectorType instanceof ArrowType.Utf8) { + return new VarCharVector(name, allocator); + } else if (vectorType instanceof ArrowType.Date && ((ArrowType.Date) vectorType).getUnit() == DAY) { + return new DateDayVector(name, allocator); + } else { + throw new IllegalArgumentException("Unknown arrow type: " + vectorType); + } + } + + /** + * Fill random data to a vector. + * @param vector the vector to fill. + */ + private void fillRandomData(ValueVector vector) { + if (vector instanceof BitVector) { + BitVector vec = (BitVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextBoolean() ? 1 : 0); + } + } + } else if (vector instanceof TinyIntVector) { + TinyIntVector vec = (TinyIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); + } + } + } else if (vector instanceof SmallIntVector) { + SmallIntVector vec = (SmallIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); + } + } + } else if (vector instanceof IntVector) { + IntVector vec = (IntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); + } + } + } else if (vector instanceof BigIntVector) { + BigIntVector vec = (BigIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextLong()); + } + } + } else if (vector instanceof Float4Vector) { + Float4Vector vec = (Float4Vector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextFloat()); + } + } + } else if (vector instanceof Float8Vector) { + Float8Vector vec = (Float8Vector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextDouble()); + } + } + } else if (vector instanceof VarBinaryVector) { + VarBinaryVector vec = (VarBinaryVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int length = getVariableDataLength(); + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + vec.setSafe(i, buffer); + } + } + } else if (vector instanceof VarCharVector) { + VarCharVector vec = (VarCharVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int length = getVariableDataLength(); + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + vec.setSafe(i, buffer); + } + } + } else if (vector instanceof DateDayVector) { + DateDayVector vec = (DateDayVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int day = random.nextInt() & Integer.MAX_VALUE % (2000 * 365); + vec.set(i, day); + } + } + } + else { + throw new IllegalArgumentException("Vector with type " + vector.getClass() + " is not supported."); + } + + } + + /** + * Generate the length of a random byte array. + * @return + */ + private int getVariableDataLength() { + // first, generate a non-negative integer; + int r = random.nextInt() & Integer.MAX_VALUE; + return r % (VAR_LENGTH_DATA_MAX_SIZE - VAR_LENGTH_DATA_MIN_SIZE) + VAR_LENGTH_DATA_MIN_SIZE; + } + + public void close() { + this.allocator.close(); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java new file mode 100644 index 00000000000..32fe100de96 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.performance.sql; + +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.holders.NullableVarCharHolder; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Test; + +/** + * Evaluate the benchmarks for TPC-H Q1. + */ +public class TpchQ1Test extends SqlPerormancefTestBase { + + /** + * Evaluate the workload of an operator that produces new data by projecting and filtering the input data. + * @return the duration of the evaluation, in nano-second. + */ + private long projectAndFilter() { + // create input schema + Field[] inputFields = new Field[]{ + new Field("inCol0", DOUBLE_TYPE, null), + new Field("inCol1", DOUBLE_TYPE, null), + new Field("inCol2", DOUBLE_TYPE, null), + new Field("inCol3", DOUBLE_TYPE, null), + new Field("inCol4", STRING_TYPE, null), + new Field("inCol5", STRING_TYPE, null), + new Field("inCol6", DATE_DAY_TYPE, null), + }; + + // create input data + ValueVector[] inputVectors = createVectors(inputFields, true); + + // create output schema + Field[] outputFields = new Field[] { + new Field("outCol0", STRING_TYPE, null), + new Field("outCol1", STRING_TYPE, null), + new Field("outCol2", DOUBLE_TYPE, null), + new Field("outCol3", DOUBLE_TYPE, null), + new Field("outCol4", DOUBLE_TYPE, null), + new Field("outCol5", DOUBLE_TYPE, null), + new Field("outCol6", DOUBLE_TYPE, null), + }; + + // create output data + ValueVector[] outputVectors = createVectors(outputFields, false); + + // vector references that will be used in the computations + Float8Vector inputCol0 = (Float8Vector) inputVectors[0]; + Float8Vector inputCol1 = (Float8Vector) inputVectors[1]; + Float8Vector inputCol2 = (Float8Vector) inputVectors[2]; + Float8Vector inputCol3 = (Float8Vector) inputVectors[3]; + VarCharVector inputCol4 = (VarCharVector) inputVectors[4]; + VarCharVector inputCol5 = (VarCharVector) inputVectors[5]; + DateDayVector inputCol6 = (DateDayVector) inputVectors[6]; + + VarCharVector outputCol0 = (VarCharVector) outputVectors[0]; + VarCharVector outputCol1 = (VarCharVector) outputVectors[1]; + Float8Vector outputCol2 = (Float8Vector) outputVectors[2]; + Float8Vector outputCol3 = (Float8Vector) outputVectors[3]; + Float8Vector outputCol4 = (Float8Vector) outputVectors[4]; + Float8Vector outputCol5 = (Float8Vector) outputVectors[5]; + Float8Vector outputCol6 = (Float8Vector) outputVectors[6]; + + // do evaluation + long start = System.nanoTime(); + for (int i = 0; i < DEFAULT_CAPACITY; i++) { + boolean isInputCol6Null = inputCol6.isNull(i); + int inputCol6Value = -1; + if (!isInputCol6Null) { + inputCol6Value = inputCol6.get(i); + } + + boolean filterResult = false; + if (!isInputCol6Null) { + filterResult = inputCol6Value <= 10441; + } + + if (filterResult) { + boolean isInputCol5Null = inputCol5.isNull(i); + NullableVarCharHolder inputCol5Value = new NullableVarCharHolder(); + if (!isInputCol5Null) { + inputCol5.get(i, inputCol5Value); + } + boolean isInputCol0Null = inputCol0.isNull(i); + double inputCol0Value = -1.0d; + if (!isInputCol0Null) { + inputCol0Value = inputCol0.get(i); + } + boolean isInputCol2Null = inputCol2.isNull(i); + double inputCol2Value = -1.0d; + if (!isInputCol2Null) { + inputCol2Value = inputCol2.get(i); + } + boolean isInputCol4Null = inputCol4.isNull(i); + NullableVarCharHolder inputCol4Value = new NullableVarCharHolder(); + if (!isInputCol4Null) { + inputCol4.get(i, inputCol4Value); + } + boolean isInputCol1Null = inputCol1.isNull(i); + double inputCol1Value = -1.0d; + if (!isInputCol1Null) { + inputCol1Value = inputCol1.get(i); + } + boolean isInputCol3Null = inputCol3.isNull(i); + double inputCol3Value = -1.0d; + if (!isInputCol3Null) { + inputCol3Value = inputCol3.get(i); + } + + if (isInputCol4Null) { + outputCol0.setNull(i); + } else { + outputCol0.setSafe(i, inputCol4Value); + } + + if (isInputCol5Null) { + outputCol1.setNull(i); + } else { + outputCol1.setSafe(i, inputCol5Value); + } + + if (isInputCol0Null) { + outputCol2.setNull(i); + } else { + outputCol2.set(i, inputCol0Value); + } + + if (isInputCol1Null) { + outputCol3.setNull(i); + } else { + outputCol3.set(i, inputCol1Value); + } + + double minusResult1 = -1.0d; + if (!isInputCol2Null) { + minusResult1 = 1.0d - inputCol2Value; + } + + boolean isInputCol1OrCol2Null = isInputCol1Null || isInputCol2Null; + double multResult1 = -1.0d; + if (!isInputCol1OrCol2Null) { + multResult1 = inputCol1Value * minusResult1; + } + + if (isInputCol1OrCol2Null) { + outputCol4.setNull(i); + } else { + outputCol4.set(i, multResult1); + } + + double minusResult2 = -1.0d; + if (!isInputCol2Null) { + minusResult2 = 1.0d - inputCol2Value; + } + + double multResult2 = -1.0d; + if (!isInputCol1OrCol2Null) { + multResult2 = inputCol1Value * minusResult2; + } + + double addResult = -1.0d; + if (!isInputCol3Null) { + addResult = 1.0d+ inputCol3Value; + } + + boolean isInputCol1OrCol2OrCol3Null = isInputCol1OrCol2Null || isInputCol3Null; + double multResult3 = -1.0d; + if (!isInputCol1OrCol2OrCol3Null) { + multResult3 = multResult2 * addResult; + } + + if (isInputCol1OrCol2OrCol3Null) { + outputCol5.setNull(i); + } else { + outputCol5.set(i, multResult3); + } + + if (isInputCol2Null) { + outputCol6.setNull(i); + } else { + outputCol6.set(i, inputCol2Value); + } + } + } + long end = System.nanoTime(); + + // dispose input/output data + for (int i = 0; i < inputVectors.length; i++) { + inputVectors[i].clear(); + } + + for (int i = 0; i < outputVectors.length; i++) { + outputVectors[i].clear(); + } + + return end - start; + } + + @Test + public void testProjectAndFilter() { + runBenchmark("TPC-H Q1#Project & Filter", () -> projectAndFilter(), 1); + } +} From d1dcff1019b1c6e940888e8785535830dd9d1da3 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Thu, 25 Apr 2019 14:50:26 +0800 Subject: [PATCH 2/7] [ARROW-5209][Java]Adjust indentions --- .../sql/SqlPerormancefTestBase.java | 568 +++++++++--------- .../vector/performance/sql/TpchQ1Test.java | 373 ++++++------ 2 files changed, 473 insertions(+), 468 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java index 3dece132cd1..c998ac0947e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java @@ -46,319 +46,323 @@ */ public abstract class SqlPerormancefTestBase { - /** - * The total amount of memory used in each evaluation. - */ - public static final long ALLOCATION_CAPACITY = 5 * 1024 * 1024L; + /** + * The total amount of memory used in each evaluation. + */ + public static final long ALLOCATION_CAPACITY = 5 * 1024 * 1024L; - /** - * The default capacity of each vector. - */ - public static final int DEFAULT_CAPACITY = 1024; + /** + * The default capacity of each vector. + */ + public static final int DEFAULT_CAPACITY = 1024; - /** - * The probability that a randomly generated value be null. - */ - public static final double NULL_PROBABILITY = 0.1; + /** + * The probability that a randomly generated value be null. + */ + public static final double NULL_PROBABILITY = 0.1; - /** - * The minimum length of a random string (inclusive). - */ - public static final int VAR_LENGTH_DATA_MIN_SIZE = 5; + /** + * The minimum length of a random string (inclusive). + */ + public static final int VAR_LENGTH_DATA_MIN_SIZE = 5; - /** - * The maximum length of a random string (exclusive). - */ - public static final int VAR_LENGTH_DATA_MAX_SIZE = 20; + /** + * The maximum length of a random string (exclusive). + */ + public static final int VAR_LENGTH_DATA_MAX_SIZE = 20; - /** - * The initial size reserved for a random string. - */ - public static final int VAR_LENGTH_DATA_INIT_SIZE = 16; + /** + * The initial size reserved for a random string. + */ + public static final int VAR_LENGTH_DATA_INIT_SIZE = 16; - // /UTILITIES FOR DATA TYPES FROM HERE + // /UTILITIES FOR DATA TYPES FROM HERE - public static final FieldType BOOLEAN_TYPE = new FieldType(true, new ArrowType.Bool(), null); + public static final FieldType BOOLEAN_TYPE = new FieldType(true, new ArrowType.Bool(), null); - public static final FieldType TINY_INT_TYPE = new FieldType(true, new ArrowType.Int(8, true), null); + public static final FieldType TINY_INT_TYPE = new FieldType(true, new ArrowType.Int(8, true), null); - public static final FieldType SMALL_INT_TYPE = new FieldType(true, new ArrowType.Int(16, true), null); + public static final FieldType SMALL_INT_TYPE = new FieldType(true, new ArrowType.Int(16, true), null); - public static final FieldType INT_TYPE = new FieldType(true, new ArrowType.Int(32, true), null); + public static final FieldType INT_TYPE = new FieldType(true, new ArrowType.Int(32, true), null); - public static final FieldType BIG_INT_TYPE = new FieldType(true, new ArrowType.Int(64, true), null); + public static final FieldType BIG_INT_TYPE = new FieldType(true, new ArrowType.Int(64, true), null); - public static final FieldType FLOAT_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); + public static final FieldType FLOAT_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); - public static final FieldType DOUBLE_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); + public static final FieldType DOUBLE_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); - public static final FieldType STRING_TYPE = new FieldType(true, new ArrowType.Utf8(), null); + public static final FieldType STRING_TYPE = new FieldType(true, new ArrowType.Utf8(), null); - public static final FieldType BYTE_ARRAY_TYPE = new FieldType(true, new ArrowType.Binary(), null); + public static final FieldType BYTE_ARRAY_TYPE = new FieldType(true, new ArrowType.Binary(), null); - public static final FieldType DATE_DAY_TYPE = new FieldType(true, new ArrowType.Date(DAY), null); + public static final FieldType DATE_DAY_TYPE = new FieldType(true, new ArrowType.Date(DAY), null); - /** - * The allocator for all vectors. - */ - protected RootAllocator allocator = new RootAllocator(ALLOCATION_CAPACITY); + /** + * The allocator for all vectors. + */ + protected RootAllocator allocator = new RootAllocator(ALLOCATION_CAPACITY); - /** - * Random number generator. - * A constant seed is applied so the evaluations can be reproduced with exactly the same data. - */ - private Random random = new Random(0); + /** + * Random number generator. + * A constant seed is applied so the evaluations can be reproduced with exactly the same data. + */ + private Random random = new Random(0); - /** - * The number of times each benchmark runs. - */ - protected int numRepeats = 10; + /** + * The number of times each benchmark runs. + */ + protected int numRepeats = 10; - /** - * The template method for running a benchmark. - * @param name name of the benchmark, used for display. - * @param benchmark the benchmark to run. - * @param warmUp the number of warm-ups to run, if it is a positive integer. - */ - protected void runBenchmark(String name, Supplier benchmark, int warmUp) { - if (warmUp > 0) { - // run warm-ups - for (int i = 0; i < warmUp; i++) { - benchmark.get(); - } - } - - double minDuration = Double.MAX_VALUE; - double maxDuration = Double.MIN_VALUE; - double totalDuration = 0; + /** + * The template method for running a benchmark. + * + * @param name name of the benchmark, used for display. + * @param benchmark the benchmark to run. + * @param warmUp the number of warm-ups to run, if it is a positive integer. + */ + protected void runBenchmark(String name, Supplier benchmark, int warmUp) { + if (warmUp > 0) { + // run warm-ups + for (int i = 0; i < warmUp; i++) { + benchmark.get(); + } + } - // run benchmark - for (int i = 0; i < numRepeats; i++) { - double duration = benchmark.get().doubleValue() / 1e6; + double minDuration = Double.MAX_VALUE; + double maxDuration = Double.MIN_VALUE; + double totalDuration = 0; - totalDuration += duration; - if (duration > maxDuration) { - maxDuration = duration; - } + // run benchmark + for (int i = 0; i < numRepeats; i++) { + double duration = benchmark.get().doubleValue() / 1e6; - if (duration < minDuration) { - minDuration = duration; - } - } + totalDuration += duration; + if (duration > maxDuration) { + maxDuration = duration; + } - // display statistics. - System.out.println("Statistics for benchmark " + name + ":\n" + - "Num of repeats = " + numRepeats + "\n" + - "Max duration = " + maxDuration + "ms\n" + - "Min duration = " + minDuration + "ms\n" + - "Avg duration = " + totalDuration / numRepeats + "ms"); + if (duration < minDuration) { + minDuration = duration; + } } - // /UTILITY METHODS FROM HERE - - /** - * Utility method to create vectors. - * @param fields the schema of the vectors. - * @param fillData if it is true, fill the vectors will random data. - * @return the created vectors. - */ - public ValueVector[] createVectors(Field[] fields, boolean fillData) { - ValueVector[] ret = new ValueVector[fields.length]; - for (int i = 0; i < fields.length; i++) { - // create an empty vector - ret[i] = createVector(fields[i]); - - // reserve memory space for the vector - if (ret[i] instanceof FixedWidthVector) { - ((FixedWidthVector) ret[i]).allocateNew(DEFAULT_CAPACITY); - } else if (ret[i] instanceof VariableWidthVector) { - ((VariableWidthVector) ret[i]).allocateNew(VAR_LENGTH_DATA_INIT_SIZE * DEFAULT_CAPACITY, DEFAULT_CAPACITY); - } - - // fill random data, if necessary - if (fillData) { - fillRandomData(ret[i]); - } - } - return ret; + // display statistics. + System.out.println("Statistics for benchmark " + name + ":\n" + + "Num of repeats = " + numRepeats + "\n" + + "Max duration = " + maxDuration + "ms\n" + + "Min duration = " + minDuration + "ms\n" + + "Avg duration = " + totalDuration / numRepeats + "ms"); + } + + // /UTILITY METHODS FROM HERE + + /** + * Utility method to create vectors. + * + * @param fields the schema of the vectors. + * @param fillData if it is true, fill the vectors will random data. + * @return the created vectors. + */ + public ValueVector[] createVectors(Field[] fields, boolean fillData) { + ValueVector[] ret = new ValueVector[fields.length]; + for (int i = 0; i < fields.length; i++) { + // create an empty vector + ret[i] = createVector(fields[i]); + + // reserve memory space for the vector + if (ret[i] instanceof FixedWidthVector) { + ((FixedWidthVector) ret[i]).allocateNew(DEFAULT_CAPACITY); + } else if (ret[i] instanceof VariableWidthVector) { + ((VariableWidthVector) ret[i]).allocateNew(VAR_LENGTH_DATA_INIT_SIZE * DEFAULT_CAPACITY, DEFAULT_CAPACITY); + } + + // fill random data, if necessary + if (fillData) { + fillRandomData(ret[i]); + } } - - /** - * Create an empty vector. - * @param field The name and data type of the vector to create. - * @return the created vector. - */ - protected ValueVector createVector(Field field) { - ArrowType vectorType = field.getType(); - String name = field.getName(); - - if (vectorType instanceof ArrowType.Bool) { - return new BitVector(name, allocator); - } else if (vectorType instanceof ArrowType.Int) { - ArrowType.Int intType = (ArrowType.Int) vectorType; - switch (intType.getBitWidth()) { - case 8: - return new TinyIntVector(name, allocator); - case 16: - return new SmallIntVector(name, allocator); - case 32: - return new IntVector(name, allocator); - case 64: - return new BigIntVector(name, allocator); - default: - throw new IllegalArgumentException("Unknown width for int type: " + intType.getBitWidth()); - } - } else if (vectorType instanceof ArrowType.FloatingPoint) { - ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vectorType; - switch (floatType.getPrecision()) { - case SINGLE: - return new Float4Vector(name, allocator); - case DOUBLE: - return new Float8Vector(name, allocator); - default: - throw new IllegalArgumentException("Unknown precision for float type: " + floatType.getPrecision()); - } - } else if (vectorType instanceof ArrowType.Binary) { - return new VarBinaryVector(name, allocator); - } else if (vectorType instanceof ArrowType.Utf8) { - return new VarCharVector(name, allocator); - } else if (vectorType instanceof ArrowType.Date && ((ArrowType.Date) vectorType).getUnit() == DAY) { - return new DateDayVector(name, allocator); + return ret; + } + + /** + * Create an empty vector. + * + * @param field The name and data type of the vector to create. + * @return the created vector. + */ + protected ValueVector createVector(Field field) { + ArrowType vectorType = field.getType(); + String name = field.getName(); + + if (vectorType instanceof ArrowType.Bool) { + return new BitVector(name, allocator); + } else if (vectorType instanceof ArrowType.Int) { + ArrowType.Int intType = (ArrowType.Int) vectorType; + switch (intType.getBitWidth()) { + case 8: + return new TinyIntVector(name, allocator); + case 16: + return new SmallIntVector(name, allocator); + case 32: + return new IntVector(name, allocator); + case 64: + return new BigIntVector(name, allocator); + default: + throw new IllegalArgumentException("Unknown width for int type: " + intType.getBitWidth()); + } + } else if (vectorType instanceof ArrowType.FloatingPoint) { + ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vectorType; + switch (floatType.getPrecision()) { + case SINGLE: + return new Float4Vector(name, allocator); + case DOUBLE: + return new Float8Vector(name, allocator); + default: + throw new IllegalArgumentException("Unknown precision for float type: " + floatType.getPrecision()); + } + } else if (vectorType instanceof ArrowType.Binary) { + return new VarBinaryVector(name, allocator); + } else if (vectorType instanceof ArrowType.Utf8) { + return new VarCharVector(name, allocator); + } else if (vectorType instanceof ArrowType.Date && ((ArrowType.Date) vectorType).getUnit() == DAY) { + return new DateDayVector(name, allocator); + } else { + throw new IllegalArgumentException("Unknown arrow type: " + vectorType); + } + } + + /** + * Fill random data to a vector. + * + * @param vector the vector to fill. + */ + private void fillRandomData(ValueVector vector) { + if (vector instanceof BitVector) { + BitVector vec = (BitVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); } else { - throw new IllegalArgumentException("Unknown arrow type: " + vectorType); + vec.set(i, random.nextBoolean() ? 1 : 0); } - } - - /** - * Fill random data to a vector. - * @param vector the vector to fill. - */ - private void fillRandomData(ValueVector vector) { - if (vector instanceof BitVector) { - BitVector vec = (BitVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextBoolean() ? 1 : 0); - } - } - } else if (vector instanceof TinyIntVector) { - TinyIntVector vec = (TinyIntVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextInt()); - } - } - } else if (vector instanceof SmallIntVector) { - SmallIntVector vec = (SmallIntVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextInt()); - } - } - } else if (vector instanceof IntVector) { - IntVector vec = (IntVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextInt()); - } - } - } else if (vector instanceof BigIntVector) { - BigIntVector vec = (BigIntVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextLong()); - } - } - } else if (vector instanceof Float4Vector) { - Float4Vector vec = (Float4Vector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextFloat()); - } - } - } else if (vector instanceof Float8Vector) { - Float8Vector vec = (Float8Vector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - vec.set(i, random.nextDouble()); - } - } - } else if (vector instanceof VarBinaryVector) { - VarBinaryVector vec = (VarBinaryVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - int length = getVariableDataLength(); - byte[] buffer = new byte[length]; - random.nextBytes(buffer); - vec.setSafe(i, buffer); - } - } - } else if (vector instanceof VarCharVector) { - VarCharVector vec = (VarCharVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - int length = getVariableDataLength(); - byte[] buffer = new byte[length]; - random.nextBytes(buffer); - vec.setSafe(i, buffer); - } - } - } else if (vector instanceof DateDayVector) { - DateDayVector vec = (DateDayVector) vector; - for (int i = 0; i < vector.getValueCapacity(); i++) { - boolean isNull = random.nextDouble() <= NULL_PROBABILITY; - if (isNull) { - vec.setNull(i); - } else { - int day = random.nextInt() & Integer.MAX_VALUE % (2000 * 365); - vec.set(i, day); - } - } + } + } else if (vector instanceof TinyIntVector) { + TinyIntVector vec = (TinyIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); } - else { - throw new IllegalArgumentException("Vector with type " + vector.getClass() + " is not supported."); + } + } else if (vector instanceof SmallIntVector) { + SmallIntVector vec = (SmallIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); } - - } - - /** - * Generate the length of a random byte array. - * @return - */ - private int getVariableDataLength() { - // first, generate a non-negative integer; - int r = random.nextInt() & Integer.MAX_VALUE; - return r % (VAR_LENGTH_DATA_MAX_SIZE - VAR_LENGTH_DATA_MIN_SIZE) + VAR_LENGTH_DATA_MIN_SIZE; + } + } else if (vector instanceof IntVector) { + IntVector vec = (IntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextInt()); + } + } + } else if (vector instanceof BigIntVector) { + BigIntVector vec = (BigIntVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextLong()); + } + } + } else if (vector instanceof Float4Vector) { + Float4Vector vec = (Float4Vector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextFloat()); + } + } + } else if (vector instanceof Float8Vector) { + Float8Vector vec = (Float8Vector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + vec.set(i, random.nextDouble()); + } + } + } else if (vector instanceof VarBinaryVector) { + VarBinaryVector vec = (VarBinaryVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int length = getVariableDataLength(); + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + vec.setSafe(i, buffer); + } + } + } else if (vector instanceof VarCharVector) { + VarCharVector vec = (VarCharVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int length = getVariableDataLength(); + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + vec.setSafe(i, buffer); + } + } + } else if (vector instanceof DateDayVector) { + DateDayVector vec = (DateDayVector) vector; + for (int i = 0; i < vector.getValueCapacity(); i++) { + boolean isNull = random.nextDouble() <= NULL_PROBABILITY; + if (isNull) { + vec.setNull(i); + } else { + int day = random.nextInt() & Integer.MAX_VALUE % (2000 * 365); + vec.set(i, day); + } + } + } else { + throw new IllegalArgumentException("Vector with type " + vector.getClass() + " is not supported."); } - public void close() { - this.allocator.close(); - } + } + + /** + * Generate the length of a random byte array. + * + * @return + */ + private int getVariableDataLength() { + // first, generate a non-negative integer; + int r = random.nextInt() & Integer.MAX_VALUE; + return r % (VAR_LENGTH_DATA_MAX_SIZE - VAR_LENGTH_DATA_MIN_SIZE) + VAR_LENGTH_DATA_MIN_SIZE; + } + + public void close() { + this.allocator.close(); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java index 32fe100de96..ad51338f442 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java @@ -30,193 +30,194 @@ */ public class TpchQ1Test extends SqlPerormancefTestBase { - /** - * Evaluate the workload of an operator that produces new data by projecting and filtering the input data. - * @return the duration of the evaluation, in nano-second. - */ - private long projectAndFilter() { - // create input schema - Field[] inputFields = new Field[]{ - new Field("inCol0", DOUBLE_TYPE, null), - new Field("inCol1", DOUBLE_TYPE, null), - new Field("inCol2", DOUBLE_TYPE, null), - new Field("inCol3", DOUBLE_TYPE, null), - new Field("inCol4", STRING_TYPE, null), - new Field("inCol5", STRING_TYPE, null), - new Field("inCol6", DATE_DAY_TYPE, null), - }; - - // create input data - ValueVector[] inputVectors = createVectors(inputFields, true); - - // create output schema - Field[] outputFields = new Field[] { - new Field("outCol0", STRING_TYPE, null), - new Field("outCol1", STRING_TYPE, null), - new Field("outCol2", DOUBLE_TYPE, null), - new Field("outCol3", DOUBLE_TYPE, null), - new Field("outCol4", DOUBLE_TYPE, null), - new Field("outCol5", DOUBLE_TYPE, null), - new Field("outCol6", DOUBLE_TYPE, null), - }; - - // create output data - ValueVector[] outputVectors = createVectors(outputFields, false); - - // vector references that will be used in the computations - Float8Vector inputCol0 = (Float8Vector) inputVectors[0]; - Float8Vector inputCol1 = (Float8Vector) inputVectors[1]; - Float8Vector inputCol2 = (Float8Vector) inputVectors[2]; - Float8Vector inputCol3 = (Float8Vector) inputVectors[3]; - VarCharVector inputCol4 = (VarCharVector) inputVectors[4]; - VarCharVector inputCol5 = (VarCharVector) inputVectors[5]; - DateDayVector inputCol6 = (DateDayVector) inputVectors[6]; - - VarCharVector outputCol0 = (VarCharVector) outputVectors[0]; - VarCharVector outputCol1 = (VarCharVector) outputVectors[1]; - Float8Vector outputCol2 = (Float8Vector) outputVectors[2]; - Float8Vector outputCol3 = (Float8Vector) outputVectors[3]; - Float8Vector outputCol4 = (Float8Vector) outputVectors[4]; - Float8Vector outputCol5 = (Float8Vector) outputVectors[5]; - Float8Vector outputCol6 = (Float8Vector) outputVectors[6]; - - // do evaluation - long start = System.nanoTime(); - for (int i = 0; i < DEFAULT_CAPACITY; i++) { - boolean isInputCol6Null = inputCol6.isNull(i); - int inputCol6Value = -1; - if (!isInputCol6Null) { - inputCol6Value = inputCol6.get(i); - } - - boolean filterResult = false; - if (!isInputCol6Null) { - filterResult = inputCol6Value <= 10441; - } - - if (filterResult) { - boolean isInputCol5Null = inputCol5.isNull(i); - NullableVarCharHolder inputCol5Value = new NullableVarCharHolder(); - if (!isInputCol5Null) { - inputCol5.get(i, inputCol5Value); - } - boolean isInputCol0Null = inputCol0.isNull(i); - double inputCol0Value = -1.0d; - if (!isInputCol0Null) { - inputCol0Value = inputCol0.get(i); - } - boolean isInputCol2Null = inputCol2.isNull(i); - double inputCol2Value = -1.0d; - if (!isInputCol2Null) { - inputCol2Value = inputCol2.get(i); - } - boolean isInputCol4Null = inputCol4.isNull(i); - NullableVarCharHolder inputCol4Value = new NullableVarCharHolder(); - if (!isInputCol4Null) { - inputCol4.get(i, inputCol4Value); - } - boolean isInputCol1Null = inputCol1.isNull(i); - double inputCol1Value = -1.0d; - if (!isInputCol1Null) { - inputCol1Value = inputCol1.get(i); - } - boolean isInputCol3Null = inputCol3.isNull(i); - double inputCol3Value = -1.0d; - if (!isInputCol3Null) { - inputCol3Value = inputCol3.get(i); - } - - if (isInputCol4Null) { - outputCol0.setNull(i); - } else { - outputCol0.setSafe(i, inputCol4Value); - } - - if (isInputCol5Null) { - outputCol1.setNull(i); - } else { - outputCol1.setSafe(i, inputCol5Value); - } - - if (isInputCol0Null) { - outputCol2.setNull(i); - } else { - outputCol2.set(i, inputCol0Value); - } - - if (isInputCol1Null) { - outputCol3.setNull(i); - } else { - outputCol3.set(i, inputCol1Value); - } - - double minusResult1 = -1.0d; - if (!isInputCol2Null) { - minusResult1 = 1.0d - inputCol2Value; - } - - boolean isInputCol1OrCol2Null = isInputCol1Null || isInputCol2Null; - double multResult1 = -1.0d; - if (!isInputCol1OrCol2Null) { - multResult1 = inputCol1Value * minusResult1; - } - - if (isInputCol1OrCol2Null) { - outputCol4.setNull(i); - } else { - outputCol4.set(i, multResult1); - } - - double minusResult2 = -1.0d; - if (!isInputCol2Null) { - minusResult2 = 1.0d - inputCol2Value; - } - - double multResult2 = -1.0d; - if (!isInputCol1OrCol2Null) { - multResult2 = inputCol1Value * minusResult2; - } - - double addResult = -1.0d; - if (!isInputCol3Null) { - addResult = 1.0d+ inputCol3Value; - } - - boolean isInputCol1OrCol2OrCol3Null = isInputCol1OrCol2Null || isInputCol3Null; - double multResult3 = -1.0d; - if (!isInputCol1OrCol2OrCol3Null) { - multResult3 = multResult2 * addResult; - } - - if (isInputCol1OrCol2OrCol3Null) { - outputCol5.setNull(i); - } else { - outputCol5.set(i, multResult3); - } - - if (isInputCol2Null) { - outputCol6.setNull(i); - } else { - outputCol6.set(i, inputCol2Value); - } - } - } - long end = System.nanoTime(); - - // dispose input/output data - for (int i = 0; i < inputVectors.length; i++) { - inputVectors[i].clear(); - } - - for (int i = 0; i < outputVectors.length; i++) { - outputVectors[i].clear(); - } - - return end - start; + /** + * Evaluate the workload of an operator that produces new data by projecting and filtering the input data. + * + * @return the duration of the evaluation, in nano-second. + */ + private long projectAndFilter() { + // create input schema + Field[] inputFields = new Field[]{ + new Field("inCol0", DOUBLE_TYPE, null), + new Field("inCol1", DOUBLE_TYPE, null), + new Field("inCol2", DOUBLE_TYPE, null), + new Field("inCol3", DOUBLE_TYPE, null), + new Field("inCol4", STRING_TYPE, null), + new Field("inCol5", STRING_TYPE, null), + new Field("inCol6", DATE_DAY_TYPE, null), + }; + + // create input data + ValueVector[] inputVectors = createVectors(inputFields, true); + + // create output schema + Field[] outputFields = new Field[]{ + new Field("outCol0", STRING_TYPE, null), + new Field("outCol1", STRING_TYPE, null), + new Field("outCol2", DOUBLE_TYPE, null), + new Field("outCol3", DOUBLE_TYPE, null), + new Field("outCol4", DOUBLE_TYPE, null), + new Field("outCol5", DOUBLE_TYPE, null), + new Field("outCol6", DOUBLE_TYPE, null), + }; + + // create output data + ValueVector[] outputVectors = createVectors(outputFields, false); + + // vector references that will be used in the computations + Float8Vector inputCol0 = (Float8Vector) inputVectors[0]; + Float8Vector inputCol1 = (Float8Vector) inputVectors[1]; + Float8Vector inputCol2 = (Float8Vector) inputVectors[2]; + Float8Vector inputCol3 = (Float8Vector) inputVectors[3]; + VarCharVector inputCol4 = (VarCharVector) inputVectors[4]; + VarCharVector inputCol5 = (VarCharVector) inputVectors[5]; + DateDayVector inputCol6 = (DateDayVector) inputVectors[6]; + + VarCharVector outputCol0 = (VarCharVector) outputVectors[0]; + VarCharVector outputCol1 = (VarCharVector) outputVectors[1]; + Float8Vector outputCol2 = (Float8Vector) outputVectors[2]; + Float8Vector outputCol3 = (Float8Vector) outputVectors[3]; + Float8Vector outputCol4 = (Float8Vector) outputVectors[4]; + Float8Vector outputCol5 = (Float8Vector) outputVectors[5]; + Float8Vector outputCol6 = (Float8Vector) outputVectors[6]; + + // do evaluation + long start = System.nanoTime(); + for (int i = 0; i < DEFAULT_CAPACITY; i++) { + boolean isInputCol6Null = inputCol6.isNull(i); + int inputCol6Value = -1; + if (!isInputCol6Null) { + inputCol6Value = inputCol6.get(i); + } + + boolean filterResult = false; + if (!isInputCol6Null) { + filterResult = inputCol6Value <= 10441; + } + + if (filterResult) { + boolean isInputCol5Null = inputCol5.isNull(i); + NullableVarCharHolder inputCol5Value = new NullableVarCharHolder(); + if (!isInputCol5Null) { + inputCol5.get(i, inputCol5Value); + } + boolean isInputCol0Null = inputCol0.isNull(i); + double inputCol0Value = -1.0d; + if (!isInputCol0Null) { + inputCol0Value = inputCol0.get(i); + } + boolean isInputCol2Null = inputCol2.isNull(i); + double inputCol2Value = -1.0d; + if (!isInputCol2Null) { + inputCol2Value = inputCol2.get(i); + } + boolean isInputCol4Null = inputCol4.isNull(i); + NullableVarCharHolder inputCol4Value = new NullableVarCharHolder(); + if (!isInputCol4Null) { + inputCol4.get(i, inputCol4Value); + } + boolean isInputCol1Null = inputCol1.isNull(i); + double inputCol1Value = -1.0d; + if (!isInputCol1Null) { + inputCol1Value = inputCol1.get(i); + } + boolean isInputCol3Null = inputCol3.isNull(i); + double inputCol3Value = -1.0d; + if (!isInputCol3Null) { + inputCol3Value = inputCol3.get(i); + } + + if (isInputCol4Null) { + outputCol0.setNull(i); + } else { + outputCol0.setSafe(i, inputCol4Value); + } + + if (isInputCol5Null) { + outputCol1.setNull(i); + } else { + outputCol1.setSafe(i, inputCol5Value); + } + + if (isInputCol0Null) { + outputCol2.setNull(i); + } else { + outputCol2.set(i, inputCol0Value); + } + + if (isInputCol1Null) { + outputCol3.setNull(i); + } else { + outputCol3.set(i, inputCol1Value); + } + + double minusResult1 = -1.0d; + if (!isInputCol2Null) { + minusResult1 = 1.0d - inputCol2Value; + } + + boolean isInputCol1OrCol2Null = isInputCol1Null || isInputCol2Null; + double multResult1 = -1.0d; + if (!isInputCol1OrCol2Null) { + multResult1 = inputCol1Value * minusResult1; + } + + if (isInputCol1OrCol2Null) { + outputCol4.setNull(i); + } else { + outputCol4.set(i, multResult1); + } + + double minusResult2 = -1.0d; + if (!isInputCol2Null) { + minusResult2 = 1.0d - inputCol2Value; + } + + double multResult2 = -1.0d; + if (!isInputCol1OrCol2Null) { + multResult2 = inputCol1Value * minusResult2; + } + + double addResult = -1.0d; + if (!isInputCol3Null) { + addResult = 1.0d + inputCol3Value; + } + + boolean isInputCol1OrCol2OrCol3Null = isInputCol1OrCol2Null || isInputCol3Null; + double multResult3 = -1.0d; + if (!isInputCol1OrCol2OrCol3Null) { + multResult3 = multResult2 * addResult; + } + + if (isInputCol1OrCol2OrCol3Null) { + outputCol5.setNull(i); + } else { + outputCol5.set(i, multResult3); + } + + if (isInputCol2Null) { + outputCol6.setNull(i); + } else { + outputCol6.set(i, inputCol2Value); + } + } } + long end = System.nanoTime(); - @Test - public void testProjectAndFilter() { - runBenchmark("TPC-H Q1#Project & Filter", () -> projectAndFilter(), 1); + // dispose input/output data + for (int i = 0; i < inputVectors.length; i++) { + inputVectors[i].clear(); } + + for (int i = 0; i < outputVectors.length; i++) { + outputVectors[i].clear(); + } + + return end - start; + } + + @Test + public void testProjectAndFilter() { + runBenchmark("TPC-H Q1#Project & Filter", () -> projectAndFilter(), 1); + } } From 7f994d7de75588e8b022b5e18770bb4fd676c931 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Thu, 25 Apr 2019 16:08:17 +0800 Subject: [PATCH 3/7] [ARROW-5209][Java]Fix style errors --- .../sql/SqlPerormancefTestBase.java | 43 +++++++++++-------- .../vector/performance/sql/TpchQ1Test.java | 28 ++++++------ 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java index c998ac0947e..b4d87c4bf5d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java @@ -17,6 +17,11 @@ package org.apache.arrow.vector.performance.sql; +import static org.apache.arrow.vector.types.DateUnit.DAY; + +import java.util.Random; +import java.util.function.Supplier; + import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -36,11 +41,6 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import java.util.Random; -import java.util.function.Supplier; - -import static org.apache.arrow.vector.types.DateUnit.DAY; - /** * The base class for all SQL performance evaluations. */ @@ -78,25 +78,35 @@ public abstract class SqlPerormancefTestBase { // /UTILITIES FOR DATA TYPES FROM HERE - public static final FieldType BOOLEAN_TYPE = new FieldType(true, new ArrowType.Bool(), null); + public static final FieldType BOOLEAN_TYPE = + new FieldType(true, new ArrowType.Bool(), null); - public static final FieldType TINY_INT_TYPE = new FieldType(true, new ArrowType.Int(8, true), null); + public static final FieldType TINY_INT_TYPE = + new FieldType(true, new ArrowType.Int(8, true), null); - public static final FieldType SMALL_INT_TYPE = new FieldType(true, new ArrowType.Int(16, true), null); + public static final FieldType SMALL_INT_TYPE = + new FieldType(true, new ArrowType.Int(16, true), null); - public static final FieldType INT_TYPE = new FieldType(true, new ArrowType.Int(32, true), null); + public static final FieldType INT_TYPE = + new FieldType(true, new ArrowType.Int(32, true), null); - public static final FieldType BIG_INT_TYPE = new FieldType(true, new ArrowType.Int(64, true), null); + public static final FieldType BIG_INT_TYPE = + new FieldType(true, new ArrowType.Int(64, true), null); - public static final FieldType FLOAT_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); + public static final FieldType FLOAT_TYPE = + new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); - public static final FieldType DOUBLE_TYPE = new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); + public static final FieldType DOUBLE_TYPE = + new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); - public static final FieldType STRING_TYPE = new FieldType(true, new ArrowType.Utf8(), null); + public static final FieldType STRING_TYPE = + new FieldType(true, new ArrowType.Utf8(), null); - public static final FieldType BYTE_ARRAY_TYPE = new FieldType(true, new ArrowType.Binary(), null); + public static final FieldType BYTE_ARRAY_TYPE = + new FieldType(true, new ArrowType.Binary(), null); - public static final FieldType DATE_DAY_TYPE = new FieldType(true, new ArrowType.Date(DAY), null); + public static final FieldType DATE_DAY_TYPE = + new FieldType(true, new ArrowType.Date(DAY), null); /** * The allocator for all vectors. @@ -353,8 +363,7 @@ private void fillRandomData(ValueVector vector) { /** * Generate the length of a random byte array. - * - * @return + * @return the length for the byte array. */ private int getVariableDataLength() { // first, generate a non-negative integer; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java index ad51338f442..46d802a83b2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java @@ -38,13 +38,13 @@ public class TpchQ1Test extends SqlPerormancefTestBase { private long projectAndFilter() { // create input schema Field[] inputFields = new Field[]{ - new Field("inCol0", DOUBLE_TYPE, null), - new Field("inCol1", DOUBLE_TYPE, null), - new Field("inCol2", DOUBLE_TYPE, null), - new Field("inCol3", DOUBLE_TYPE, null), - new Field("inCol4", STRING_TYPE, null), - new Field("inCol5", STRING_TYPE, null), - new Field("inCol6", DATE_DAY_TYPE, null), + new Field("inCol0", DOUBLE_TYPE, null), + new Field("inCol1", DOUBLE_TYPE, null), + new Field("inCol2", DOUBLE_TYPE, null), + new Field("inCol3", DOUBLE_TYPE, null), + new Field("inCol4", STRING_TYPE, null), + new Field("inCol5", STRING_TYPE, null), + new Field("inCol6", DATE_DAY_TYPE, null), }; // create input data @@ -52,13 +52,13 @@ private long projectAndFilter() { // create output schema Field[] outputFields = new Field[]{ - new Field("outCol0", STRING_TYPE, null), - new Field("outCol1", STRING_TYPE, null), - new Field("outCol2", DOUBLE_TYPE, null), - new Field("outCol3", DOUBLE_TYPE, null), - new Field("outCol4", DOUBLE_TYPE, null), - new Field("outCol5", DOUBLE_TYPE, null), - new Field("outCol6", DOUBLE_TYPE, null), + new Field("outCol0", STRING_TYPE, null), + new Field("outCol1", STRING_TYPE, null), + new Field("outCol2", DOUBLE_TYPE, null), + new Field("outCol3", DOUBLE_TYPE, null), + new Field("outCol4", DOUBLE_TYPE, null), + new Field("outCol5", DOUBLE_TYPE, null), + new Field("outCol6", DOUBLE_TYPE, null), }; // create output data From 60d8ef6792fa595386185829049e6d898a2fa12e Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sun, 28 Apr 2019 14:57:28 +0800 Subject: [PATCH 4/7] [ARROW-5209][Java]Add another benchmark for Q1 --- ...Base.java => SqlPerformancefTestBase.java} | 4 +- .../vector/performance/sql/TpchQ1Test.java | 201 +++++++++++++++++- 2 files changed, 199 insertions(+), 6 deletions(-) rename java/vector/src/test/java/org/apache/arrow/vector/performance/sql/{SqlPerormancefTestBase.java => SqlPerformancefTestBase.java} (99%) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java similarity index 99% rename from java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java rename to java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java index b4d87c4bf5d..bea169ac6fc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerormancefTestBase.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java @@ -44,7 +44,7 @@ /** * The base class for all SQL performance evaluations. */ -public abstract class SqlPerormancefTestBase { +public abstract class SqlPerformancefTestBase { /** * The total amount of memory used in each evaluation. @@ -162,7 +162,7 @@ protected void runBenchmark(String name, Supplier benchmark, int warmUp) { "Num of repeats = " + numRepeats + "\n" + "Max duration = " + maxDuration + "ms\n" + "Min duration = " + minDuration + "ms\n" + - "Avg duration = " + totalDuration / numRepeats + "ms"); + "Avg duration = " + totalDuration / numRepeats + "ms\n"); } // /UTILITY METHODS FROM HERE diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java index 46d802a83b2..3ba4de58bd7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java @@ -17,6 +17,7 @@ package org.apache.arrow.vector.performance.sql; +import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.ValueVector; @@ -28,14 +29,14 @@ /** * Evaluate the benchmarks for TPC-H Q1. */ -public class TpchQ1Test extends SqlPerormancefTestBase { +public class TpchQ1Test extends SqlPerformancefTestBase { /** * Evaluate the workload of an operator that produces new data by projecting and filtering the input data. * * @return the duration of the evaluation, in nano-second. */ - private long projectAndFilter() { + private long projectAndFilter1() { // create input schema Field[] inputFields = new Field[]{ new Field("inCol0", DOUBLE_TYPE, null), @@ -216,8 +217,200 @@ private long projectAndFilter() { return end - start; } + private long projectAndFilter2() { + // create input schema + Field[] inputFields = new Field[]{ + new Field("inCol0", STRING_TYPE, null), + new Field("inCol1", STRING_TYPE, null), + new Field("inCol2", DOUBLE_TYPE, null), + new Field("inCol3", DOUBLE_TYPE, null), + new Field("inCol4", DOUBLE_TYPE, null), + new Field("inCol5", DOUBLE_TYPE, null), + new Field("inCol6", BIG_INT_TYPE, null), + new Field("inCol7", DOUBLE_TYPE, null), + }; + + // create input data + ValueVector[] inputVectors = createVectors(inputFields, true); + + // create output schema + Field[] outputFields = new Field[]{ + new Field("outCol0", STRING_TYPE, null), + new Field("outCol1", STRING_TYPE, null), + new Field("outCol2", DOUBLE_TYPE, null), + new Field("outCol3", DOUBLE_TYPE, null), + new Field("outCol4", DOUBLE_TYPE, null), + new Field("outCol5", DOUBLE_TYPE, null), + new Field("outCol6", DOUBLE_TYPE, null), + new Field("outCol7", DOUBLE_TYPE, null), + new Field("outCol8", DOUBLE_TYPE, null), + new Field("outCol9", BIG_INT_TYPE, null), + }; + + // create output data + ValueVector[] outputVectors = createVectors(outputFields, false); + + // vector references that will be used in the computations + VarCharVector inputCol0 = (VarCharVector) inputVectors[0]; + VarCharVector inputCol1 = (VarCharVector) inputVectors[1]; + Float8Vector inputCol2 = (Float8Vector) inputVectors[2]; + Float8Vector inputCol3 = (Float8Vector) inputVectors[3]; + Float8Vector inputCol4 = (Float8Vector) inputVectors[4]; + Float8Vector inputCol5 = (Float8Vector) inputVectors[5]; + BigIntVector inputCol6 = (BigIntVector) inputVectors[6]; + Float8Vector inputCol7 = (Float8Vector) inputVectors[7]; + + VarCharVector outputCol0 = (VarCharVector) outputVectors[0]; + VarCharVector outputCol1 = (VarCharVector) outputVectors[1]; + Float8Vector outputCol2 = (Float8Vector) outputVectors[2]; + Float8Vector outputCol3 = (Float8Vector) outputVectors[3]; + Float8Vector outputCol4 = (Float8Vector) outputVectors[4]; + Float8Vector outputCol5 = (Float8Vector) outputVectors[5]; + Float8Vector outputCol6 = (Float8Vector) outputVectors[6]; + Float8Vector outputCol7 = (Float8Vector) outputVectors[7]; + Float8Vector outputCol8 = (Float8Vector) outputVectors[8]; + BigIntVector outputCol9 = (BigIntVector) outputVectors[9]; + + // do evaluation + long start = System.nanoTime(); + for (int i = 0; i < DEFAULT_CAPACITY; i++) { + boolean isInputCol6Null = inputCol6.isNull(i); + long inputCol6Value = -1L; + if (!isInputCol6Null) { + inputCol6Value = inputCol6.get(i); + } + boolean isInputCol5Null = inputCol5.isNull(i); + double inputCol5Value = -1.0d; + if (!isInputCol5Null) { + inputCol5Value = inputCol5.get(i); + } + boolean isInputCol2Null = inputCol2.isNull(i); + double inputCol2Value = -1.0d; + if (!isInputCol2Null) { + inputCol2Value = inputCol2.get(i); + } + boolean isInputCol0Null = inputCol0.isNull(i); + NullableVarCharHolder inputCol0Value = new NullableVarCharHolder(); + if (!isInputCol0Null) { + inputCol0.get(i, inputCol0Value); + } + boolean isInputCol4Null = inputCol4.isNull(i); + double inputCol4Value = -1.0d; + if (!isInputCol4Null) { + inputCol4Value = inputCol4.get(i); + } + boolean isInputCol1Null = inputCol1.isNull(i); + NullableVarCharHolder inputCol1Value = new NullableVarCharHolder(); + if (!isInputCol1Null) { + inputCol1.get(i, inputCol1Value); + } + boolean isInputCol7Null = inputCol7.isNull(i); + double inputCol7Value = -1.0d; + if (!isInputCol7Null) { + inputCol7Value = inputCol7.get(i); + } + boolean isInputCol3Null = inputCol3.isNull(i); + double inputCol3Value = -1.0d; + if (!isInputCol3Null) { + inputCol3Value = inputCol3.get(i); + } + + if (isInputCol0Null) { + outputCol0.setNull(i); + } else { + outputCol0.setSafe(i, inputCol0Value); + } + + if (isInputCol1Null) { + outputCol1.setNull(i); + } else { + outputCol1.setSafe(i, inputCol1Value); + } + + if (isInputCol2Null) { + outputCol2.setNull(i); + } else { + outputCol2.set(i, inputCol2Value); + } + + if (isInputCol3Null) { + outputCol3.setNull(i); + } else { + outputCol3.set(i, inputCol3Value); + } + + if (isInputCol4Null) { + outputCol4.setNull(i); + } else { + outputCol4.set(i, inputCol4Value); + } + + if (isInputCol5Null) { + outputCol5.setNull(i); + } else { + outputCol5.set(i, inputCol5Value); + } + + boolean isInputCol2OrCol6Null = isInputCol2Null || isInputCol6Null; + double divResul1 = -1.0d; + if (!isInputCol2OrCol6Null) { + divResul1 = inputCol2Value / Long.valueOf(inputCol6Value).doubleValue(); + } + + if (isInputCol2OrCol6Null) { + outputCol6.setNull(i); + } else { + outputCol6.set(i, divResul1); + } + + boolean isInputCol3OrCol6Null = isInputCol3Null || isInputCol6Null; + double divResult2 = -1.0d; + if (!isInputCol3OrCol6Null) { + divResult2 = inputCol3Value / Long.valueOf(inputCol6Value).doubleValue(); + } + + if (isInputCol3OrCol6Null) { + outputCol7.setNull(i); + } else { + outputCol7.set(i, divResult2); + } + + boolean isInputCol7OrCol6Null = isInputCol7Null || isInputCol6Null; + double divResult3 = -1.0d; + if (!isInputCol7OrCol6Null) { + divResult3 = inputCol7Value / Long.valueOf(inputCol6Value).doubleValue(); + } + + if (isInputCol7OrCol6Null) { + outputCol8.setNull(i); + } else { + outputCol8.set(i, divResult3); + } + + if (isInputCol6Null) { + outputCol9.setNull(i); + } else { + outputCol9.set(i, inputCol6Value); + } + } + + long end = System.nanoTime(); + + // dispose input/output data + for (int i = 0; i < inputVectors.length; i++) { + inputVectors[i].clear(); + } + + for (int i = 0; i < outputVectors.length; i++) { + outputVectors[i].clear(); + } + + return end - start; + } + @Test - public void testProjectAndFilter() { - runBenchmark("TPC-H Q1#Project & Filter", () -> projectAndFilter(), 1); + public void runBenchmarks() { + runBenchmark("TPC-H Q1#Project & Filter1", () -> projectAndFilter1(), 1); + runBenchmark("TPC-H Q1#Project & Filter2", () -> projectAndFilter2(), 1); } } From a60cf9ca6b36517382fe12e18b484d12ee03d0b5 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sun, 5 May 2019 15:17:23 +0800 Subject: [PATCH 5/7] [ARROW-5209][Java]Support benchmarks by JMH --- java/performance/pom.xml | 181 ++++++++++++++++++ .../sql/SqlPerformancefTestHelper.java} | 62 +----- .../performance/sql/TpchQ1Benchmarks.java} | 59 +++--- java/pom.xml | 1 + 4 files changed, 229 insertions(+), 74 deletions(-) create mode 100644 java/performance/pom.xml rename java/{vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java => performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java} (86%) rename java/{vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java => performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java} (89%) diff --git a/java/performance/pom.xml b/java/performance/pom.xml new file mode 100644 index 00000000000..8401ccbaf1c --- /dev/null +++ b/java/performance/pom.xml @@ -0,0 +1,181 @@ + + + + 4.0.0 + + arrow-java-root + org.apache.arrow + 0.14.0-SNAPSHOT + + arrow-performance + jar + Arrow Performance Benchmarks + + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + provided + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.1 + + + org.apache.logging.log4j + log4j-core + 2.1 + + + org.apache.arrow + arrow-vector + ${project.version} + + + + + UTF-8 + + + 1.21 + + + 1.8 + + + benchmarks + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + ${javac.target} + ${javac.target} + ${javac.target} + + + + org.apache.maven.plugins + maven-shade-plugin + 2.2 + + + package + + shade + + + ${uberjar.name} + + + org.openjdk.jmh.Main + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + maven-clean-plugin + 2.5 + + + maven-deploy-plugin + 2.8.1 + + + maven-install-plugin + 2.5.1 + + + maven-jar-plugin + 2.4 + + + maven-javadoc-plugin + 2.9.1 + + + maven-resources-plugin + 2.6 + + + maven-site-plugin + 3.3 + + + maven-source-plugin + 2.2.1 + + + maven-surefire-plugin + 2.17 + + + + + + diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java b/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java similarity index 86% rename from java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java rename to java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java index bea169ac6fc..5da1ea07c7e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/SqlPerformancefTestBase.java +++ b/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java @@ -15,9 +15,11 @@ * limitations under the License. */ -package org.apache.arrow.vector.performance.sql; +package org.apache.arrow.performance.sql; import static org.apache.arrow.vector.types.DateUnit.DAY; +import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; +import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; import java.util.Random; import java.util.function.Supplier; @@ -36,15 +38,17 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VariableWidthVector; +import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.openjdk.jmh.annotations.State; /** * The base class for all SQL performance evaluations. */ -public abstract class SqlPerformancefTestBase { +public class SqlPerformancefTestHelper { /** * The total amount of memory used in each evaluation. @@ -94,10 +98,10 @@ public abstract class SqlPerformancefTestBase { new FieldType(true, new ArrowType.Int(64, true), null); public static final FieldType FLOAT_TYPE = - new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); + new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null); public static final FieldType DOUBLE_TYPE = - new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); + new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null); public static final FieldType STRING_TYPE = new FieldType(true, new ArrowType.Utf8(), null); @@ -106,7 +110,7 @@ public abstract class SqlPerformancefTestBase { new FieldType(true, new ArrowType.Binary(), null); public static final FieldType DATE_DAY_TYPE = - new FieldType(true, new ArrowType.Date(DAY), null); + new FieldType(true, new ArrowType.Date(DateUnit.DAY), null); /** * The allocator for all vectors. @@ -119,52 +123,6 @@ public abstract class SqlPerformancefTestBase { */ private Random random = new Random(0); - /** - * The number of times each benchmark runs. - */ - protected int numRepeats = 10; - - /** - * The template method for running a benchmark. - * - * @param name name of the benchmark, used for display. - * @param benchmark the benchmark to run. - * @param warmUp the number of warm-ups to run, if it is a positive integer. - */ - protected void runBenchmark(String name, Supplier benchmark, int warmUp) { - if (warmUp > 0) { - // run warm-ups - for (int i = 0; i < warmUp; i++) { - benchmark.get(); - } - } - - double minDuration = Double.MAX_VALUE; - double maxDuration = Double.MIN_VALUE; - double totalDuration = 0; - - // run benchmark - for (int i = 0; i < numRepeats; i++) { - double duration = benchmark.get().doubleValue() / 1e6; - - totalDuration += duration; - if (duration > maxDuration) { - maxDuration = duration; - } - - if (duration < minDuration) { - minDuration = duration; - } - } - - // display statistics. - System.out.println("Statistics for benchmark " + name + ":\n" + - "Num of repeats = " + numRepeats + "\n" + - "Max duration = " + maxDuration + "ms\n" + - "Min duration = " + minDuration + "ms\n" + - "Avg duration = " + totalDuration / numRepeats + "ms\n"); - } - // /UTILITY METHODS FROM HERE /** @@ -235,7 +193,7 @@ protected ValueVector createVector(Field field) { return new VarBinaryVector(name, allocator); } else if (vectorType instanceof ArrowType.Utf8) { return new VarCharVector(name, allocator); - } else if (vectorType instanceof ArrowType.Date && ((ArrowType.Date) vectorType).getUnit() == DAY) { + } else if (vectorType instanceof ArrowType.Date && ((ArrowType.Date) vectorType).getUnit() == DateUnit.DAY) { return new DateDayVector(name, allocator); } else { throw new IllegalArgumentException("Unknown arrow type: " + vectorType); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java b/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java similarity index 89% rename from java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java rename to java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java index 3ba4de58bd7..63d52c5f3cb 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/performance/sql/TpchQ1Test.java +++ b/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.arrow.vector.performance.sql; +package org.apache.arrow.performance.sql; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.DateDayVector; @@ -24,19 +24,32 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.apache.arrow.vector.types.pojo.Field; -import org.junit.Test; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.*; /** * Evaluate the benchmarks for TPC-H Q1. */ -public class TpchQ1Test extends SqlPerformancefTestBase { +public class TpchQ1Benchmarks { /** * Evaluate the workload of an operator that produces new data by projecting and filtering the input data. * * @return the duration of the evaluation, in nano-second. */ - private long projectAndFilter1() { + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void projectAndFilter1() { + SqlPerformancefTestHelper helper = new SqlPerformancefTestHelper(); + // create input schema Field[] inputFields = new Field[]{ new Field("inCol0", DOUBLE_TYPE, null), @@ -49,7 +62,7 @@ private long projectAndFilter1() { }; // create input data - ValueVector[] inputVectors = createVectors(inputFields, true); + ValueVector[] inputVectors = helper.createVectors(inputFields, true); // create output schema Field[] outputFields = new Field[]{ @@ -63,7 +76,7 @@ private long projectAndFilter1() { }; // create output data - ValueVector[] outputVectors = createVectors(outputFields, false); + ValueVector[] outputVectors = helper.createVectors(outputFields, false); // vector references that will be used in the computations Float8Vector inputCol0 = (Float8Vector) inputVectors[0]; @@ -83,8 +96,7 @@ private long projectAndFilter1() { Float8Vector outputCol6 = (Float8Vector) outputVectors[6]; // do evaluation - long start = System.nanoTime(); - for (int i = 0; i < DEFAULT_CAPACITY; i++) { + for (int i = 0; i < helper.DEFAULT_CAPACITY; i++) { boolean isInputCol6Null = inputCol6.isNull(i); int inputCol6Value = -1; if (!isInputCol6Null) { @@ -203,7 +215,6 @@ private long projectAndFilter1() { } } } - long end = System.nanoTime(); // dispose input/output data for (int i = 0; i < inputVectors.length; i++) { @@ -214,10 +225,15 @@ private long projectAndFilter1() { outputVectors[i].clear(); } - return end - start; + helper.close(); } - private long projectAndFilter2() { + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void projectAndFilter2() { + SqlPerformancefTestHelper helper = new SqlPerformancefTestHelper(); + // create input schema Field[] inputFields = new Field[]{ new Field("inCol0", STRING_TYPE, null), @@ -231,7 +247,7 @@ private long projectAndFilter2() { }; // create input data - ValueVector[] inputVectors = createVectors(inputFields, true); + ValueVector[] inputVectors = helper.createVectors(inputFields, true); // create output schema Field[] outputFields = new Field[]{ @@ -248,7 +264,7 @@ private long projectAndFilter2() { }; // create output data - ValueVector[] outputVectors = createVectors(outputFields, false); + ValueVector[] outputVectors = helper.createVectors(outputFields, false); // vector references that will be used in the computations VarCharVector inputCol0 = (VarCharVector) inputVectors[0]; @@ -272,7 +288,6 @@ private long projectAndFilter2() { BigIntVector outputCol9 = (BigIntVector) outputVectors[9]; // do evaluation - long start = System.nanoTime(); for (int i = 0; i < DEFAULT_CAPACITY; i++) { boolean isInputCol6Null = inputCol6.isNull(i); long inputCol6Value = -1L; @@ -394,8 +409,6 @@ private long projectAndFilter2() { } } - long end = System.nanoTime(); - // dispose input/output data for (int i = 0; i < inputVectors.length; i++) { inputVectors[i].clear(); @@ -404,13 +417,15 @@ private long projectAndFilter2() { for (int i = 0; i < outputVectors.length; i++) { outputVectors[i].clear(); } - - return end - start; + helper.close(); } - @Test - public void runBenchmarks() { - runBenchmark("TPC-H Q1#Project & Filter1", () -> projectAndFilter1(), 1); - runBenchmark("TPC-H Q1#Project & Filter2", () -> projectAndFilter2(), 1); + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(TpchQ1Benchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); } } diff --git a/java/pom.xml b/java/pom.xml index 9e4afcfa546..98e7f2505b7 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -652,6 +652,7 @@ adapter/jdbc plasma flight + performance From 375da92624c0da0b168beff0bb0e2caf2fe4db3e Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sun, 5 May 2019 15:54:05 +0800 Subject: [PATCH 6/7] [ARROW-5209][Java]Fix style problems --- java/performance/pom.xml | 7 +++++++ .../sql/SqlPerformancefTestHelper.java | 2 -- .../arrow/performance/sql/TpchQ1Benchmarks.java | 17 ++++++++++++----- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 8401ccbaf1c..0d2baf05c04 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -56,17 +56,24 @@ THE POSSIBILITY OF SUCH DAMAGE. org.apache.logging.log4j log4j-slf4j-impl 2.1 + runtime org.apache.logging.log4j log4j-core 2.1 + runtime org.apache.arrow arrow-vector ${project.version} + + org.apache.arrow + arrow-memory + ${project.version} + diff --git a/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java b/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java index 5da1ea07c7e..cae83955b63 100644 --- a/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java +++ b/java/performance/src/main/java/org/apache/arrow/performance/sql/SqlPerformancefTestHelper.java @@ -17,12 +17,10 @@ package org.apache.arrow.performance.sql; -import static org.apache.arrow.vector.types.DateUnit.DAY; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; import java.util.Random; -import java.util.function.Supplier; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; diff --git a/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java b/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java index 63d52c5f3cb..35bc3e8ba6f 100644 --- a/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/performance/sql/TpchQ1Benchmarks.java @@ -17,6 +17,14 @@ package org.apache.arrow.performance.sql; +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.BIG_INT_TYPE; +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.DATE_DAY_TYPE; +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.DEFAULT_CAPACITY; +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.DOUBLE_TYPE; +import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.STRING_TYPE; + +import java.util.concurrent.TimeUnit; + import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.Float8Vector; @@ -24,16 +32,15 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.apache.arrow.vector.types.pojo.Field; -import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.util.concurrent.TimeUnit; - -import static org.apache.arrow.performance.sql.SqlPerformancefTestHelper.*; - /** * Evaluate the benchmarks for TPC-H Q1. */ From 1bc9002489b14dff8f46f16b734a63f2d00e0e1d Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sun, 5 May 2019 16:02:07 +0800 Subject: [PATCH 7/7] [ARROW-5209][Java]Add apache license header --- java/performance/pom.xml | 56 +++++++--------------------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 0d2baf05c04..88301c03d72 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -1,34 +1,14 @@ - + 4.0.0 @@ -78,20 +58,8 @@ THE POSSIBILITY OF SUCH DAMAGE. UTF-8 - - 1.21 - - 1.8 - - benchmarks @@ -126,10 +94,6 @@ THE POSSIBILITY OF SUCH DAMAGE. - *:* META-INF/*.SF