From 2b556f6b1924f7f958d33ded36a255422ac09860 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 12 Jun 2023 16:12:28 +0530 Subject: [PATCH 01/20] Vectorizing earliest for numeric --- .../query/SqlExpressionBenchmark.java | 16 +- .../first/DoubleFirstAggregatorFactory.java | 30 +++ .../first/DoubleFirstVectorAggregator.java | 66 +++++++ .../first/FloatFirstAggregatorFactory.java | 28 +++ .../first/FloatFirstVectorAggregator.java | 66 +++++++ .../first/LongFirstAggregatorFactory.java | 27 +++ .../first/LongFirstVectorAggregator.java | 66 +++++++ .../first/NumericFirstVectorAggregator.java | 177 ++++++++++++++++++ .../DoubleFirstVectorAggregationTest.java | 160 ++++++++++++++++ .../FloatFirstVectorAggregationTest.java | 167 +++++++++++++++++ .../first/LongFirstVectorAggregationTest.java | 160 ++++++++++++++++ .../query/groupby/GroupByQueryRunnerTest.java | 9 - .../timeseries/TimeseriesQueryRunnerTest.java | 6 - .../EarliestLatestAnySqlAggregator.java | 3 +- .../druid/sql/calcite/CalciteQueryTest.java | 78 +++++++- 15 files changed, 1032 insertions(+), 27 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java create mode 100644 processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java create mode 100644 processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java create mode 100644 processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 1c64d7a749df..018ac6176d96 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -215,7 +215,15 @@ public String getFormatString() "SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo", // 42,43: filter numeric nulls "SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL", - "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1" + "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1", + // 44: EARLIEST aggregator + "SELECT EARLIEST(long1) FROM foo", + // 45: EARLIEST aggregator double + "SELECT EARLIEST(double4) FROM foo", + // 46: EARLIEST aggregator float + "SELECT EARLIEST(float3) FROM foo", + // 47: EARLIEST aggregator all + "SELECT EARLIEST(float3), EARLIEST(long1), EARLIEST(double4) FROM foo" ); @Param({"5000000"}) @@ -273,7 +281,11 @@ public String getFormatString() "40", "41", "42", - "43" + "43", + "44", + "45", + "46", + "47" }) private String query; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java index d575b263f0fb..8711bb76feaa 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java @@ -29,14 +29,21 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.aggregation.any.NumericNilVectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseDoubleColumnValueSelector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -97,6 +104,12 @@ public DoubleFirstAggregatorFactory( this.storeDoubleAsFloat = ColumnHolder.storeDoubleAsFloat(); } + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { @@ -125,6 +138,23 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) } } + @Override + public VectorAggregator factorizeVector( + VectorColumnSelectorFactory columnSelectorFactory + ) + { + ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + //time is always long + BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( + timeColumn); + if (capabilities == null || capabilities.isNumeric()) { + return new DoubleFirstVectorAggregator(timeSelector, valueSelector); + } else { + return NumericNilVectorAggregator.doubleNilVectorAggregator(); + } + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java new file mode 100644 index 000000000000..c05a5c162f5f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.collections.SerializablePair; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. + */ +public class DoubleFirstVectorAggregator extends NumericFirstVectorAggregator +{ + double firstValue; + + public DoubleFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) + { + super(timeSelector, valueSelector); + firstValue = 0; + } + + @Override + public void initValue(ByteBuffer buf, int position) + { + buf.putDouble(position, 0); + } + + + @Override + void putValue(ByteBuffer buf, int position, int index) + { + firstValue = valueSelector.getDoubleVector()[index]; + buf.putDouble(position, firstValue); + } + + + /** + * @return The primitive object stored at the position in the buffer. + */ + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + final boolean rhsNull = isValueNull(buf, position); + return new SerializablePair<>(buf.getLong(position), rhsNull ? null : buf.getDouble(position + VALUE_OFFSET)); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java index be6a0f6aad97..6ee0f596dfa3 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java @@ -29,14 +29,21 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.aggregation.any.NumericNilVectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseFloatColumnValueSelector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -123,6 +130,27 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) } } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + //time is always long + BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( + timeColumn); + if (capabilities == null || capabilities.isNumeric()) { + return new FloatFirstVectorAggregator(timeSelector, valueSelector); + } else { + return NumericNilVectorAggregator.floatNilVectorAggregator(); + } + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java new file mode 100644 index 000000000000..da5edded9488 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.collections.SerializablePair; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. + */ +public class FloatFirstVectorAggregator extends NumericFirstVectorAggregator +{ + float firstValue; + + public FloatFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) + { + super(timeSelector, valueSelector); + firstValue = 0; + } + + @Override + public void initValue(ByteBuffer buf, int position) + { + buf.putFloat(position, 0); + } + + + @Override + void putValue(ByteBuffer buf, int position, int index) + { + firstValue = valueSelector.getFloatVector()[index]; + buf.putFloat(position, firstValue); + } + + + /** + * @return The primitive object stored at the position in the buffer. + */ + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + final boolean rhsNull = isValueNull(buf, position); + return new SerializablePair<>(buf.getLong(position), rhsNull ? null : buf.getFloat(position + VALUE_OFFSET)); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java index 695d01b3a4a7..d572d718f9b5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java @@ -29,14 +29,21 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.aggregation.any.NumericNilVectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseLongColumnValueSelector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -122,6 +129,26 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) } } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( + timeColumn); + if (capabilities == null || capabilities.isNumeric()) { + return new LongFirstVectorAggregator(timeSelector, valueSelector); + } else { + return NumericNilVectorAggregator.longNilVectorAggregator(); + } + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java new file mode 100644 index 000000000000..521228f2be61 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.collections.SerializablePair; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. + */ +public class LongFirstVectorAggregator extends NumericFirstVectorAggregator +{ + long firstValue; + + public LongFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) + { + super(timeSelector, valueSelector); + firstValue = 0; + } + + @Override + public void initValue(ByteBuffer buf, int position) + { + buf.putLong(position, 0); + } + + + @Override + void putValue(ByteBuffer buf, int position, int index) + { + firstValue = valueSelector.getLongVector()[index]; + buf.putLong(position, firstValue); + } + + + /** + * @return The primitive object stored at the position in the buffer. + */ + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + final boolean rhsNull = isValueNull(buf, position); + return new SerializablePair<>(buf.getLong(position), rhsNull ? null : buf.getLong(position + VALUE_OFFSET)); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java new file mode 100644 index 000000000000..8fc2b55525c9 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Class for vectorized version of first/earliest aggregator over numeric types + */ +public abstract class NumericFirstVectorAggregator implements VectorAggregator +{ + static final int NULL_OFFSET = Long.BYTES; + static final int VALUE_OFFSET = NULL_OFFSET + Byte.BYTES; + final VectorValueSelector valueSelector; + private final boolean useDefault = NullHandling.replaceWithDefault(); + private final VectorValueSelector timeSelector; + private long firstTime; + + public NumericFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) + { + this.timeSelector = timeSelector; + this.valueSelector = valueSelector; + firstTime = Long.MAX_VALUE; + } + + @Override + public void init(ByteBuffer buf, int position) + { + buf.putLong(position, Long.MAX_VALUE); + buf.put(position + NULL_OFFSET, useDefault ? NullHandling.IS_NOT_NULL_BYTE : NullHandling.IS_NULL_BYTE); + initValue(buf, position + VALUE_OFFSET); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final long[] timeVector = timeSelector.getLongVector(); + final boolean[] nullValueVector = valueSelector.getNullVector(); + boolean nullAbsent = false; + firstTime = buf.getLong(position); + // check if nullVector is found or not + // the nullVector is null if no null values are found + // set the nullAbsent flag accordingly + if (nullValueVector == null) { + nullAbsent = true; + } + + // the time vector is already sorted so the first element would be the earliest + // traverse accordingly + int index = startRow; + if (!useDefault && !nullAbsent) { + for (int i = startRow; i < endRow; i++) { + if (!nullValueVector[i]) { + index = i; + break; + } + } + } + + // find the first non-null value + final long earliestTime = timeVector[index]; + if (earliestTime < firstTime) { + firstTime = earliestTime; + if (useDefault || nullValueVector == null || !nullValueVector[index]) { + updateTimeWithValue(buf, position, firstTime, index); + } else { + updateTimeWithNull(buf, position, firstTime); + } + } + } + + /** + * + * Checks if the aggregated value at a position in the buffer is null or not + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @return + */ + boolean isValueNull(ByteBuffer buf, int position) + { + return buf.get(position + NULL_OFFSET) == NullHandling.IS_NULL_BYTE; + } + + @Override + public void aggregate( + ByteBuffer buf, + int numRows, + int[] positions, + @Nullable int[] rows, + int positionOffset + ) + { + boolean[] nulls = useDefault ? null : valueSelector.getNullVector(); + long[] timeVector = timeSelector.getLongVector(); + + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + if (useDefault || nulls == null || !nulls[row]) { + updateTimeWithValue(buf, position, timeVector[row], row); + } else { + updateTimeWithNull(buf, position, timeVector[row]); + } + } + } + } + + /** + * Updates the time and the non null values to the appropriate position in buffer + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time + * @param index the index of the vectorized vector which is the last value + */ + void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) + { + buf.putLong(position, time); + buf.put(position + NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + putValue(buf, position + VALUE_OFFSET, index); + } + + /** + *Updates the time only to the appropriate position in buffer as the value is null + * + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time + */ + void updateTimeWithNull(ByteBuffer buf, int position, long time) + { + buf.putLong(position, time); + buf.put(position + NULL_OFFSET, NullHandling.IS_NULL_BYTE); + } + + /** + *Abstract function which needs to be overridden by subclasses to set the initial value + */ + abstract void initValue(ByteBuffer buf, int position); + + /** + *Abstract function which needs to be overridden by subclasses to set the + * latest value in the buffer depending on the datatype + */ + abstract void putValue(ByteBuffer buf, int position, int index); + + @Override + public void close() + { + // no resources to cleanup + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java new file mode 100644 index 000000000000..1575b33dcef0 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Answers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.nio.ByteBuffer; +import java.util.concurrent.ThreadLocalRandom; + +@RunWith(MockitoJUnitRunner.class) +public class DoubleFirstVectorAggregationTest extends InitializedNullHandlingTest +{ + private static final double EPSILON = 1e-5; + private static final double[] VALUES = new double[]{7.8d, 11, 23.67, 60}; + private static final boolean[] NULLS = new boolean[]{false, false, true, false}; + private long[] times = {2436, 6879, 7888, 8224}; + + private static final String NAME = "NAME"; + private static final String FIELD_NAME = "FIELD_NAME"; + private static final String TIME_COL = "__time"; + + @Mock + private VectorValueSelector selector; + @Mock + private BaseLongVectorValueSelector timeSelector; + private ByteBuffer buf; + + private DoubleFirstVectorAggregator target; + + private DoubleFirstAggregatorFactory doubleFirstAggregatorFactory; + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; + + @Before + public void setup() + { + byte[] randomBytes = new byte[1024]; + ThreadLocalRandom.current().nextBytes(randomBytes); + buf = ByteBuffer.wrap(randomBytes); + Mockito.doReturn(VALUES).when(selector).getDoubleVector(); + Mockito.doReturn(times).when(timeSelector).getLongVector(); + target = new DoubleFirstVectorAggregator(timeSelector, selector); + clearBufferForPositions(0, 0); + + Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); + Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); + Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + doubleFirstAggregatorFactory = new DoubleFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); + } + + @Test + public void testFactory() + { + Assert.assertTrue(doubleFirstAggregatorFactory.canVectorize(selectorFactory)); + VectorAggregator vectorAggregator = doubleFirstAggregatorFactory.factorizeVector(selectorFactory); + Assert.assertNotNull(vectorAggregator); + Assert.assertEquals(DoubleFirstVectorAggregator.class, vectorAggregator.getClass()); + } + + @Test + public void initValueShouldInitZero() + { + target.initValue(buf, 0); + double initVal = buf.getDouble(0); + Assert.assertEquals(0, initVal, EPSILON); + } + + @Test + public void aggregate() + { + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + + @Test + public void aggregateWithNulls() + { + mockNullsVector(); + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + + @Test + public void aggregateBatchWithoutRows() + { + int[] positions = new int[]{0, 43, 70}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, null, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[i], result.lhs.longValue()); + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } + } + + @Test + public void aggregateBatchWithRows() + { + int[] positions = new int[]{0, 43, 70}; + int[] rows = new int[]{3, 2, 0}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, rows, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[rows[i]], result.lhs.longValue()); + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } + } + + private void clearBufferForPositions(int offset, int... positions) + { + for (int position : positions) { + target.init(buf, offset + position); + } + } + + private void mockNullsVector() + { + if (!NullHandling.replaceWithDefault()) { + Mockito.doReturn(NULLS).when(selector).getNullVector(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java new file mode 100644 index 000000000000..0eb7afe46e65 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Answers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.nio.ByteBuffer; +import java.util.concurrent.ThreadLocalRandom; + +@RunWith(MockitoJUnitRunner.class) +public class FloatFirstVectorAggregationTest extends InitializedNullHandlingTest +{ + private static final double EPSILON = 1e-5; + private static final float[] VALUES = new float[]{7.2f, 15.6f, 2.1f, 150.0f}; + private static final boolean[] NULLS = new boolean[]{true, false, true, false}; + private long[] times = {2436, 6879, 7888, 8224}; + + private static final String NAME = "NAME"; + private static final String FIELD_NAME = "FIELD_NAME"; + private static final String TIME_COL = "__time"; + + @Mock + private VectorValueSelector selector; + @Mock + private BaseLongVectorValueSelector timeSelector; + private ByteBuffer buf; + + private FloatFirstVectorAggregator target; + + private FloatFirstAggregatorFactory floatFirstAggregatorFactory; + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; + + @Before + public void setup() + { + byte[] randomBytes = new byte[1024]; + ThreadLocalRandom.current().nextBytes(randomBytes); + buf = ByteBuffer.wrap(randomBytes); + Mockito.doReturn(VALUES).when(selector).getFloatVector(); + Mockito.doReturn(times).when(timeSelector).getLongVector(); + target = new FloatFirstVectorAggregator(timeSelector, selector); + clearBufferForPositions(0, 0); + + Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); + Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); + Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + floatFirstAggregatorFactory = new FloatFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); + + } + + @Test + public void testFactory() + { + Assert.assertTrue(floatFirstAggregatorFactory.canVectorize(selectorFactory)); + VectorAggregator vectorAggregator = floatFirstAggregatorFactory.factorizeVector(selectorFactory); + Assert.assertNotNull(vectorAggregator); + Assert.assertEquals(FloatFirstVectorAggregator.class, vectorAggregator.getClass()); + } + + @Test + public void initValueShouldBeZero() + { + target.initValue(buf, 0); + float initVal = buf.getFloat(0); + Assert.assertEquals(0.0f, initVal, EPSILON); + } + + @Test + public void aggregate() + { + target.init(buf, 0); + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + + @Test + public void aggregateWithNulls() + { + mockNullsVector(); + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + if (!NullHandling.replaceWithDefault()) { + Assert.assertEquals(times[1], result.lhs.longValue()); + Assert.assertEquals(VALUES[1], result.rhs, EPSILON); + } else { + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + } + + @Test + public void aggregateBatchWithoutRows() + { + int[] positions = new int[]{0, 43, 70}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, null, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[i], result.lhs.longValue()); + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } + } + + @Test + public void aggregateBatchWithRows() + { + int[] positions = new int[]{0, 43, 70}; + int[] rows = new int[]{3, 2, 0}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, rows, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[rows[i]], result.lhs.longValue()); + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } + } + + private void clearBufferForPositions(int offset, int... positions) + { + for (int position : positions) { + target.init(buf, offset + position); + } + } + + private void mockNullsVector() + { + if (!NullHandling.replaceWithDefault()) { + Mockito.doReturn(NULLS).when(selector).getNullVector(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java new file mode 100644 index 000000000000..5f2072ef5b25 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Answers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.nio.ByteBuffer; +import java.util.concurrent.ThreadLocalRandom; + + +@RunWith(MockitoJUnitRunner.class) +public class LongFirstVectorAggregationTest extends InitializedNullHandlingTest +{ + private static final double EPSILON = 1e-5; + private static final long[] VALUES = new long[]{7, 15, 2, 150}; + private static final boolean[] NULLS = new boolean[]{false, false, true, false}; + private static final String NAME = "NAME"; + private static final String FIELD_NAME = "FIELD_NAME"; + private static final String TIME_COL = "__time"; + private long[] times = {2436, 6879, 7888, 8224}; + @Mock + private VectorValueSelector selector; + @Mock + private BaseLongVectorValueSelector timeSelector; + private ByteBuffer buf; + private LongFirstVectorAggregator target; + + private LongFirstAggregatorFactory longFirstAggregatorFactory; + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; + + @Before + public void setup() + { + byte[] randomBytes = new byte[1024]; + ThreadLocalRandom.current().nextBytes(randomBytes); + buf = ByteBuffer.wrap(randomBytes); + Mockito.doReturn(VALUES).when(selector).getLongVector(); + Mockito.doReturn(times).when(timeSelector).getLongVector(); + target = new LongFirstVectorAggregator(timeSelector, selector); + clearBufferForPositions(0, 0); + + + Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); + Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); + Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + longFirstAggregatorFactory = new LongFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); + + } + + @Test + public void testFactory() + { + Assert.assertTrue(longFirstAggregatorFactory.canVectorize(selectorFactory)); + VectorAggregator vectorAggregator = longFirstAggregatorFactory.factorizeVector(selectorFactory); + Assert.assertNotNull(vectorAggregator); + Assert.assertEquals(LongFirstVectorAggregator.class, vectorAggregator.getClass()); + } + + @Test + public void initValueShouldInitZero() + { + target.initValue(buf, 0); + long initVal = buf.getLong(0); + Assert.assertEquals(0, initVal); + } + + @Test + public void aggregate() + { + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + + @Test + public void aggregateWithNulls() + { + mockNullsVector(); + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); + } + + @Test + public void aggregateBatchWithoutRows() + { + int[] positions = new int[]{0, 43, 70}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, null, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[i], result.lhs.longValue()); + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } + } + + @Test + public void aggregateBatchWithRows() + { + int[] positions = new int[]{0, 43, 70}; + int[] rows = new int[]{3, 2, 0}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, rows, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[rows[i]], result.lhs.longValue()); + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } + } + + private void clearBufferForPositions(int offset, int... positions) + { + for (int position : positions) { + target.init(buf, offset + position); + } + } + + private void mockNullsVector() + { + if (!NullHandling.replaceWithDefault()) { + Mockito.doReturn(NULLS).when(selector).getNullVector(); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 70c9373a0e23..4f4ba33cbda2 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -3530,9 +3530,6 @@ public void testGroupByWithCardinality() @Test public void testGroupByWithFirstLast() { - // Cannot vectorize due to "first", "last" aggregators. - cannotVectorize(); - GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC) @@ -3620,9 +3617,6 @@ public void testGroupByWithFirstLast() @Test public void testGroupByWithNoResult() { - // Cannot vectorize due to first, last aggregators. - cannotVectorize(); - GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.EMPTY_INTERVAL) @@ -7465,9 +7459,6 @@ public void testSubqueryWithHyperUniquesPostAggregator() @Test public void testSubqueryWithFirstLast() { - // Cannot vectorize due to "first", "last" aggregators. - cannotVectorize(); - GroupByQuery subquery = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC) diff --git a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java index d2755b6579ce..0a980bc4ae13 100644 --- a/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -169,9 +169,6 @@ public TimeseriesQueryRunnerTest( @Test public void testEmptyTimeseries() { - // Cannot vectorize due to "doubleFirst" aggregator. - cannotVectorize(); - TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.DATA_SOURCE) .granularity(QueryRunnerTestHelper.ALL_GRAN) @@ -1960,9 +1957,6 @@ public void testTimeseriesWithMultiValueFilteringJavascriptAggregatorAndAlsoRegu @Test public void testTimeseriesWithFirstLastAggregator() { - // Cannot vectorize due to "doubleFirst", "doubleLast" aggregators. - cannotVectorize(); - TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.DATA_SOURCE) .granularity(QueryRunnerTestHelper.MONTH_GRAN) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java index 6efc8846e914..f665d0807e76 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java @@ -198,13 +198,12 @@ public Aggregation toDruidAggregation( final String fieldName = getColumnName(plannerContext, virtualColumnRegistry, args.get(0), rexNodes.get(0)); if (!rowSignature.contains(ColumnHolder.TIME_COLUMN_NAME) && (aggregatorType == AggregatorType.LATEST || aggregatorType == AggregatorType.EARLIEST)) { - plannerContext.setPlanningError("%s() aggregator depends on __time column, the underlying datasource " + throw new ISE("%s() aggregator depends on __time column, the underlying datasource " + "or extern function you are querying doesn't contain __time column, " + "Please use %s_BY() and specify the time column you want to use", aggregatorType.name(), aggregatorType.name() ); - return null; } final AggregatorFactory theAggFactory; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index bfe870116101..cee319a1beba 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1327,12 +1327,44 @@ public void testStringAnyInSubquery() ); } + @Test + public void testOffHeapEarliestGroupBy() throws Exception + { + testQuery( + "SELECT dim2, EARLIEST(m1) AS val1 FROM foo GROUP BY dim2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setAggregatorSpecs(aggregators( + new FloatFirstAggregatorFactory("a0", "m1", null) + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of( + new Object[]{null, 2.0f}, + new Object[]{"", 3.0f}, + new Object[]{"a", 1.0f}, + new Object[]{"abc", 5.0f} + ) + : ImmutableList.of( + new Object[]{"", 2.0f}, + new Object[]{"a", 1.0f}, + new Object[]{"abc", 5.0f} + + ) + ); + } + @Test public void testEarliestAggregatorsNumericNulls() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); testQuery( "SELECT EARLIEST(l1), EARLIEST(d1), EARLIEST(f1) FROM druid.numfoo", @@ -1508,8 +1540,7 @@ public void testAnyAggregatorsSkipNullsWithFilter() public void testOrderByEarliestFloat() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); + List expected; if (NullHandling.replaceWithDefault()) { expected = ImmutableList.of( @@ -1556,8 +1587,7 @@ public void testOrderByEarliestFloat() public void testOrderByEarliestDouble() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); + List expected; if (NullHandling.replaceWithDefault()) { expected = ImmutableList.of( @@ -1604,8 +1634,7 @@ public void testOrderByEarliestDouble() public void testOrderByEarliestLong() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); + List expected; if (NullHandling.replaceWithDefault()) { expected = ImmutableList.of( @@ -14674,4 +14703,37 @@ public void testFilterWithNVLAndNotIn() ) ); } + + @Test + public void testEarliestVectorAggregators() throws Exception + { + testQuery( + "SELECT " + + "EARLIEST(cnt), EARLIEST(cnt + 1), EARLIEST(m1), EARLIEST(m1+1) " + + "FROM druid.numfoo", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .virtualColumns( + expressionVirtualColumn("v0", "(\"cnt\" + 1)", ColumnType.LONG), + expressionVirtualColumn("v1", "(\"m1\" + 1)", ColumnType.FLOAT) + ) + .aggregators( + aggregators( + new LongFirstAggregatorFactory("a0", "cnt", null), + new LongFirstAggregatorFactory("a1", "v0", null), + new FloatFirstAggregatorFactory("a2", "m1", null), + new FloatFirstAggregatorFactory("a3", "v1", null) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L, 1.0f, 2.0f} + ) + ); + } } From 59118ae8853d8729180122047cfe49069cf24026 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 12 Jun 2023 17:10:20 +0530 Subject: [PATCH 02/20] Vectorizing earliest string aggregator --- .../first/StringFirstAggregatorFactory.java | 26 +++ .../first/StringFirstVectorAggregator.java | 175 ++++++++++++++++++ .../StringFirstVectorAggregatorTest.java | 167 +++++++++++++++++ .../EarliestLatestAnySqlAggregator.java | 3 +- .../druid/sql/calcite/CalciteQueryTest.java | 9 - 5 files changed, 370 insertions(+), 10 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java create mode 100644 processing/src/test/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregatorTest.java diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java index f7624f4541b3..3cc31b2737ab 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java @@ -32,12 +32,18 @@ import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.BaseObjectColumnValueSelector; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -154,6 +160,26 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) } } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + VectorObjectSelector vSelector = selectorFactory.makeObjectSelector(fieldName); + BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) selectorFactory.makeValueSelector( + timeColumn); + if (capabilities != null) { + return new StringFirstVectorAggregator(timeSelector, vSelector, maxStringBytes); + } else { + return new StringFirstVectorAggregator(null, vSelector, maxStringBytes); + } + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Comparator getComparator() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java new file mode 100644 index 000000000000..81486d5bf83b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class StringFirstVectorAggregator implements VectorAggregator +{ + private static final SerializablePairLongString INIT = new SerializablePairLongString( + DateTimes.MAX.getMillis(), + null + ); + private final BaseLongVectorValueSelector timeSelector; + private final VectorObjectSelector valueSelector; + private final int maxStringBytes; + protected long firstTime; + + public StringFirstVectorAggregator( + BaseLongVectorValueSelector timeSelector, + VectorObjectSelector valueSelector, + int maxStringBytes + ) { + this.timeSelector = timeSelector; + this.valueSelector = valueSelector; + this.maxStringBytes = maxStringBytes; + } + + @Override + public void init(ByteBuffer buf, int position) + { + StringFirstLastUtils.writePair(buf, position, INIT, maxStringBytes); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + if (timeSelector == null) { + return; + } + long[] times = timeSelector.getLongVector(); + Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); + firstTime = buf.getLong(position); + int index; + for(int i=startRow; i firstTime) { + break; + } + index = i; + final boolean foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(objectsWhichMightBeStrings[index]); + if (foldNeeded) { + final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( + timeSelector, + valueSelector, + index + ); + if (inPair != null) { + final long firstTime = buf.getLong(position); + if (inPair.lhs < firstTime) { + StringFirstLastUtils.writePair( + buf, + position, + new SerializablePairLongString(inPair.lhs, inPair.rhs), + maxStringBytes + ); + } + } + } else { + final long time = times[index]; + if (time < firstTime) { + final String value = DimensionHandlerUtils.convertObjectToString(objectsWhichMightBeStrings[index]); + firstTime = time; + StringFirstLastUtils.writePair( + buf, + position, + new SerializablePairLongString(time, value), + maxStringBytes + ); + } + } + } + + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + long[] timeVector = timeSelector.getLongVector(); + Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); + + // iterate once over the object vector to find first non null element and + // determine if the type is Pair or not + boolean foldNeeded = false; + for (Object obj : objectsWhichMightBeStrings) { + if (obj == null) { + continue; + } else { + foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(obj); + break; + } + } + + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + if (foldNeeded) { + final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( + timeSelector, + valueSelector, + row + ); + if (inPair != null) { + if (inPair.lhs < firstTime) { + StringFirstLastUtils.writePair( + buf, + position, + new SerializablePairLongString(inPair.lhs, inPair.rhs), + maxStringBytes + ); + } + } + } else { + final String value = DimensionHandlerUtils.convertObjectToString(objectsWhichMightBeStrings[row]); + firstTime = timeVector[row]; + StringFirstLastUtils.writePair( + buf, + position, + new SerializablePairLongString(firstTime, value), + maxStringBytes + ); + } + } + } + + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return StringFirstLastUtils.readPair(buf, position); + } + + @Override + public void close() + { + // nothing to close + } +} diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregatorTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregatorTest.java new file mode 100644 index 000000000000..148f4f95937a --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregatorTest.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Answers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.nio.ByteBuffer; +import java.util.concurrent.ThreadLocalRandom; + + +@RunWith(MockitoJUnitRunner.class) +public class StringFirstVectorAggregatorTest extends InitializedNullHandlingTest +{ + private static final double EPSILON = 1e-5; + private static final String[] VALUES = new String[]{"a", "b", null, "c"}; + private static final boolean[] NULLS = new boolean[]{false, false, true, false}; + private static final String NAME = "NAME"; + private static final String FIELD_NAME = "FIELD_NAME"; + private static final String TIME_COL = "__time"; + private long[] times = {2436, 6879, 7888, 8224}; + private long[] timesSame = {2436, 2436}; + private SerializablePairLongString[] pairs = { + new SerializablePairLongString(2345001L, "first"), + new SerializablePairLongString(2345100L, "notFirst") + }; + + @Mock + private VectorObjectSelector selector; + @Mock + private VectorObjectSelector selectorForPairs; + @Mock + private BaseLongVectorValueSelector timeSelector; + @Mock + private BaseLongVectorValueSelector timeSelectorForPairs; + private ByteBuffer buf; + private StringFirstVectorAggregator target; + private StringFirstVectorAggregator targetWithPairs; + + private StringFirstAggregatorFactory stringFirstAggregatorFactory; + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; + + @Before + public void setup() + { + byte[] randomBytes = new byte[1024]; + ThreadLocalRandom.current().nextBytes(randomBytes); + buf = ByteBuffer.wrap(randomBytes); + Mockito.doReturn(VALUES).when(selector).getObjectVector(); + Mockito.doReturn(times).when(timeSelector).getLongVector(); + Mockito.doReturn(timesSame).when(timeSelectorForPairs).getLongVector(); + Mockito.doReturn(pairs).when(selectorForPairs).getObjectVector(); + target = new StringFirstVectorAggregator(timeSelector, selector, 10); + targetWithPairs = new StringFirstVectorAggregator(timeSelectorForPairs, selectorForPairs, 10); + clearBufferForPositions(0, 0); + + + Mockito.doReturn(selector).when(selectorFactory).makeObjectSelector(FIELD_NAME); + Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + stringFirstAggregatorFactory = new StringFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL, 10); + + } + + @Test + public void testAggregateWithPairs() + { + targetWithPairs.aggregate(buf, 0, 0, pairs.length); + Pair result = (Pair) targetWithPairs.get(buf, 0); + //Should come 0 as the last value as the left of the pair is greater + Assert.assertEquals(pairs[0].lhs.longValue(), result.lhs.longValue()); + Assert.assertEquals(pairs[0].rhs, result.rhs); + } + + @Test + public void testFactory() + { + Assert.assertTrue(stringFirstAggregatorFactory.canVectorize(selectorFactory)); + VectorAggregator vectorAggregator = stringFirstAggregatorFactory.factorizeVector(selectorFactory); + Assert.assertNotNull(vectorAggregator); + Assert.assertEquals(StringFirstVectorAggregator.class, vectorAggregator.getClass()); + } + + @Test + public void initValueShouldBeMaxDate() + { + target.init(buf, 0); + long initVal = buf.getLong(0); + Assert.assertEquals(DateTimes.MAX.getMillis(), initVal); + } + + @Test + public void aggregate() + { + target.aggregate(buf, 0, 0, VALUES.length); + Pair result = (Pair) target.get(buf, 0); + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs); + } + + @Test + public void aggregateBatchWithoutRows() + { + int[] positions = new int[]{0, 43, 70}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, null, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[i], result.lhs.longValue()); + Assert.assertEquals(VALUES[i], result.rhs); + } + } + + @Test + public void aggregateBatchWithRows() + { + int[] positions = new int[]{0, 43, 70}; + int[] rows = new int[]{3, 2, 0}; + int positionOffset = 2; + clearBufferForPositions(positionOffset, positions); + target.aggregate(buf, 3, positions, rows, positionOffset); + for (int i = 0; i < positions.length; i++) { + Pair result = (Pair) target.get(buf, positions[i] + positionOffset); + Assert.assertEquals(times[rows[i]], result.lhs.longValue()); + Assert.assertEquals(VALUES[rows[i]], result.rhs); + } + } + + private void clearBufferForPositions(int offset, int... positions) + { + for (int position : positions) { + target.init(buf, offset + position); + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java index f665d0807e76..6efc8846e914 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java @@ -198,12 +198,13 @@ public Aggregation toDruidAggregation( final String fieldName = getColumnName(plannerContext, virtualColumnRegistry, args.get(0), rexNodes.get(0)); if (!rowSignature.contains(ColumnHolder.TIME_COLUMN_NAME) && (aggregatorType == AggregatorType.LATEST || aggregatorType == AggregatorType.EARLIEST)) { - throw new ISE("%s() aggregator depends on __time column, the underlying datasource " + plannerContext.setPlanningError("%s() aggregator depends on __time column, the underlying datasource " + "or extern function you are querying doesn't contain __time column, " + "Please use %s_BY() and specify the time column you want to use", aggregatorType.name(), aggregatorType.name() ); + return null; } final AggregatorFactory theAggFactory; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index cee319a1beba..2185af62522f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -638,8 +638,6 @@ public void testGroupBySingleColumnDescendingNoTopN() public void testEarliestAggregators() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); testQuery( "SELECT " @@ -1071,8 +1069,6 @@ public void testStringLatestByGroupByWithAlwaysFalseCondition() public void testPrimitiveEarliestInSubquery() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); testQuery( "SELECT SUM(val1), SUM(val2), SUM(val3) FROM (SELECT dim2, EARLIEST(m1) AS val1, EARLIEST(cnt) AS val2, EARLIEST(m2) AS val3 FROM foo GROUP BY dim2)", @@ -1170,9 +1166,6 @@ public void testStringLatestInSubquery() @Test public void testStringEarliestInSubquery() { - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); - testQuery( "SELECT SUM(val) FROM (SELECT dim2, EARLIEST(dim1, 10) AS val FROM foo GROUP BY dim2)", ImmutableList.of( @@ -1424,8 +1417,6 @@ public void testLatestAggregatorsNumericNull() public void testFirstLatestAggregatorsSkipNulls() { notMsqCompatible(); - // Cannot vectorize EARLIEST aggregator. - skipVectorize(); final DimFilter filter; if (useDefault) { From 6c139de4f2afc4f0b6fd8ddbd2dfd100ce0bb3dd Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 12 Jun 2023 17:15:16 +0530 Subject: [PATCH 03/20] checkstyle fix --- .../query/aggregation/first/StringFirstVectorAggregator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 81486d5bf83b..95ddcf92c77c 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -44,7 +44,8 @@ public StringFirstVectorAggregator( BaseLongVectorValueSelector timeSelector, VectorObjectSelector valueSelector, int maxStringBytes - ) { + ) + { this.timeSelector = timeSelector; this.valueSelector = valueSelector; this.maxStringBytes = maxStringBytes; @@ -66,7 +67,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); firstTime = buf.getLong(position); int index; - for(int i=startRow; i firstTime) { break; } From 556b3def6ff563dcb799618b88460f87da79dd52 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 13 Jun 2023 07:29:36 +0530 Subject: [PATCH 04/20] Removing unnecessary exceptions --- .../java/org/apache/druid/sql/calcite/CalciteQueryTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 2185af62522f..1dc12a86f099 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1321,7 +1321,7 @@ public void testStringAnyInSubquery() } @Test - public void testOffHeapEarliestGroupBy() throws Exception + public void testOffHeapEarliestGroupBy() { testQuery( "SELECT dim2, EARLIEST(m1) AS val1 FROM foo GROUP BY dim2", @@ -14696,7 +14696,7 @@ public void testFilterWithNVLAndNotIn() } @Test - public void testEarliestVectorAggregators() throws Exception + public void testEarliestVectorAggregators() { testQuery( "SELECT " From cf6fe0f23514bec665f6550190ef3f7979a3e2d9 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 16 Jun 2023 14:40:01 +0530 Subject: [PATCH 05/20] Ignoring tests in MSQ as earliest is not supported for numeric there --- .../query/SqlExpressionBenchmark.java | 53 +------------------ .../msq/test/CalciteSelectQueryMSQTest.java | 18 +++++++ 2 files changed, 20 insertions(+), 51 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 018ac6176d96..b191d41782a1 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -221,9 +221,7 @@ public String getFormatString() // 45: EARLIEST aggregator double "SELECT EARLIEST(double4) FROM foo", // 46: EARLIEST aggregator float - "SELECT EARLIEST(float3) FROM foo", - // 47: EARLIEST aggregator all - "SELECT EARLIEST(float3), EARLIEST(long1), EARLIEST(double4) FROM foo" + "SELECT EARLIEST(float3) FROM foo" ); @Param({"5000000"}) @@ -236,56 +234,9 @@ public String getFormatString() private String vectorize; @Param({ - // non-expression reference - "0", - "1", - "2", - "3", - "4", - "5", - "6", - // expressions, etc - "7", - "8", - "9", - "10", - "11", - "12", - "13", - "14", - "15", - "16", - "17", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "38", - "39", - "40", - "41", - "42", - "43", "44", "45", - "46", - "47" + "46" }) private String query; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java index 904c84063c90..7b1a0fbb07da 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java @@ -156,6 +156,10 @@ public void testMaxSubqueryRows() } + // The numeric varieties of the EARLIEST and LATEST aggregators do not work properly. + // Attempting to use the numeric varieties of these aggregators lead to an error + // like java.lang.ClassCastException: class java.lang.Double cannot be cast + // to class org.apache.druid.collections.SerializablePair. @Ignore @Override public void testQueryWithMoreThanMaxNumericInFilter() @@ -163,6 +167,20 @@ public void testQueryWithMoreThanMaxNumericInFilter() } + // MSQ currently does not su + @Ignore + @Override + public void testEarliestVectorAggregators() + { + + } + @Ignore + @Override + public void testOffHeapEarliestGroupBy() + { + + } + /** * Same query as {@link CalciteQueryTest#testArrayAggQueryOnComplexDatatypes}. ARRAY_AGG is not supported in MSQ currently. * Once support is added, this test can be removed and msqCompatible() can be added to the one in CalciteQueryTest. From cf88e00a0aaf21045d0197790976337af68a1a28 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 16 Jun 2023 14:44:47 +0530 Subject: [PATCH 06/20] Fixing benchmarks --- .../query/SqlExpressionBenchmark.java | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index b191d41782a1..2de02863267c 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -234,9 +234,56 @@ public String getFormatString() private String vectorize; @Param({ + // non-expression reference + "0", + "1", + "2", + "3", + "4", + "5", + "6", + // expressions, etc + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", "44", "45", - "46" + "46", + "47" }) private String query; From a9a6fc261c97e1e55f1fcacef79d7d257d23dbd5 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 19 Jun 2023 10:28:04 +0530 Subject: [PATCH 07/20] Updating tests as MSQ does not support earliest for some cases --- .../query/SqlExpressionBenchmark.java | 4 ++-- .../msq/test/CalciteSelectQueryMSQTest.java | 18 ------------------ .../first/DoubleFirstVectorAggregator.java | 3 --- .../first/FloatFirstVectorAggregator.java | 3 --- .../first/LongFirstVectorAggregator.java | 3 --- .../first/NumericFirstVectorAggregator.java | 1 - .../first/StringFirstVectorAggregator.java | 6 +++--- .../druid/sql/calcite/CalciteQueryTest.java | 4 ++++ 8 files changed, 9 insertions(+), 33 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 2de02863267c..3276004da6b9 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -205,7 +205,7 @@ public String getFormatString() "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", // 37: time shift + expr agg (group by), uniform distribution high cardinality "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", - // 38: LATEST aggregator + // 38: LATEST aggregator long "SELECT LATEST(long1) FROM foo", // 39: LATEST aggregator double "SELECT LATEST(double4) FROM foo", @@ -216,7 +216,7 @@ public String getFormatString() // 42,43: filter numeric nulls "SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL", "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1", - // 44: EARLIEST aggregator + // 44: EARLIEST aggregator long "SELECT EARLIEST(long1) FROM foo", // 45: EARLIEST aggregator double "SELECT EARLIEST(double4) FROM foo", diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java index 7b1a0fbb07da..904c84063c90 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java @@ -156,10 +156,6 @@ public void testMaxSubqueryRows() } - // The numeric varieties of the EARLIEST and LATEST aggregators do not work properly. - // Attempting to use the numeric varieties of these aggregators lead to an error - // like java.lang.ClassCastException: class java.lang.Double cannot be cast - // to class org.apache.druid.collections.SerializablePair. @Ignore @Override public void testQueryWithMoreThanMaxNumericInFilter() @@ -167,20 +163,6 @@ public void testQueryWithMoreThanMaxNumericInFilter() } - // MSQ currently does not su - @Ignore - @Override - public void testEarliestVectorAggregators() - { - - } - @Ignore - @Override - public void testOffHeapEarliestGroupBy() - { - - } - /** * Same query as {@link CalciteQueryTest#testArrayAggQueryOnComplexDatatypes}. ARRAY_AGG is not supported in MSQ currently. * Once support is added, this test can be removed and msqCompatible() can be added to the one in CalciteQueryTest. diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java index c05a5c162f5f..3dd2cee95ee1 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java @@ -25,9 +25,6 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; -/** - * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. - */ public class DoubleFirstVectorAggregator extends NumericFirstVectorAggregator { double firstValue; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java index da5edded9488..a18d5259f9f2 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java @@ -25,9 +25,6 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; -/** - * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. - */ public class FloatFirstVectorAggregator extends NumericFirstVectorAggregator { float firstValue; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java index 521228f2be61..b6483508c34a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java @@ -25,9 +25,6 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; -/** - * Vectorized version of on heap 'earliest' aggregator for column selectors with type LONG.. - */ public class LongFirstVectorAggregator extends NumericFirstVectorAggregator { long firstValue; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 8fc2b55525c9..969a41294c07 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -79,7 +79,6 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) } } - // find the first non-null value final long earliestTime = timeVector[index]; if (earliestTime < firstTime) { firstTime = earliestTime; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 95ddcf92c77c..7c94c2028878 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -38,7 +38,7 @@ public class StringFirstVectorAggregator implements VectorAggregator private final BaseLongVectorValueSelector timeSelector; private final VectorObjectSelector valueSelector; private final int maxStringBytes; - protected long firstTime; + //protected long firstTime; public StringFirstVectorAggregator( BaseLongVectorValueSelector timeSelector, @@ -65,7 +65,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) } long[] times = timeSelector.getLongVector(); Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); - firstTime = buf.getLong(position); + long firstTime = buf.getLong(position); int index; for (int i = startRow; i < endRow; i++) { if (times[i] > firstTime) { @@ -80,7 +80,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) index ); if (inPair != null) { - final long firstTime = buf.getLong(position); + firstTime = buf.getLong(position); if (inPair.lhs < firstTime) { StringFirstLastUtils.writePair( buf, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 4daa019c53e8..3e61d2862b97 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1370,6 +1370,8 @@ public void testStringAnyInSubquery() @Test public void testOffHeapEarliestGroupBy() { + notMsqCompatible(); + testQuery( "SELECT dim2, EARLIEST(m1) AS val1 FROM foo GROUP BY dim2", ImmutableList.of( @@ -14745,6 +14747,8 @@ public void testFilterWithNVLAndNotIn() @Test public void testEarliestVectorAggregators() { + notMsqCompatible(); + testQuery( "SELECT " + "EARLIEST(cnt), EARLIEST(cnt + 1), EARLIEST(m1), EARLIEST(m1+1) " From f78ca05fd6a6cf5eb274a816abb4f4fc5d6a720b Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 7 Jul 2023 13:08:55 -0700 Subject: [PATCH 08/20] Addressing review comments by adding the following: 1. Checking capabilities first before creating selectors 2. Removing mockito in tests for numeric first aggs 3. Removing unnecessary tests --- .../first/DoubleFirstAggregatorFactory.java | 13 +- .../first/DoubleFirstVectorAggregator.java | 6 +- .../first/FloatFirstAggregatorFactory.java | 12 +- .../first/FloatFirstVectorAggregator.java | 6 +- .../first/LongFirstAggregatorFactory.java | 12 +- .../first/LongFirstVectorAggregator.java | 7 +- .../first/NumericFirstVectorAggregator.java | 4 +- .../DoubleFirstVectorAggregationTest.java | 134 +++++++++++++--- .../FloatFirstVectorAggregationTest.java | 147 ++++++++++++++---- .../first/LongFirstVectorAggregationTest.java | 133 ++++++++++++---- .../druid/sql/calcite/CalciteQueryTest.java | 71 --------- 11 files changed, 354 insertions(+), 191 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java index 8711bb76feaa..406335ad2a65 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java @@ -41,7 +41,6 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.vector.BaseLongVectorValueSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -144,15 +143,13 @@ public VectorAggregator factorizeVector( ) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); - //time is always long - BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( - timeColumn); - if (capabilities == null || capabilities.isNumeric()) { + if (capabilities.isNumeric()) { + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( + timeColumn); return new DoubleFirstVectorAggregator(timeSelector, valueSelector); - } else { - return NumericNilVectorAggregator.doubleNilVectorAggregator(); } + return NumericNilVectorAggregator.doubleNilVectorAggregator(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java index 3dd2cee95ee1..562f14547a6a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregator.java @@ -27,12 +27,10 @@ public class DoubleFirstVectorAggregator extends NumericFirstVectorAggregator { - double firstValue; public DoubleFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) { super(timeSelector, valueSelector); - firstValue = 0; } @Override @@ -45,13 +43,13 @@ public void initValue(ByteBuffer buf, int position) @Override void putValue(ByteBuffer buf, int position, int index) { - firstValue = valueSelector.getDoubleVector()[index]; + double firstValue = valueSelector.getDoubleVector()[index]; buf.putDouble(position, firstValue); } /** - * @return The primitive object stored at the position in the buffer. + * @return The object as a pair with the position and the value stored at the position in the buffer. */ @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java index 6ee0f596dfa3..d755f3eca909 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java @@ -41,7 +41,6 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.vector.BaseLongVectorValueSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -134,15 +133,12 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); - //time is always long - BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( - timeColumn); - if (capabilities == null || capabilities.isNumeric()) { + if (capabilities.isNumeric()) { + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector(timeColumn); return new FloatFirstVectorAggregator(timeSelector, valueSelector); - } else { - return NumericNilVectorAggregator.floatNilVectorAggregator(); } + return NumericNilVectorAggregator.floatNilVectorAggregator(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java index a18d5259f9f2..7c8c4e76f6b4 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregator.java @@ -27,12 +27,10 @@ public class FloatFirstVectorAggregator extends NumericFirstVectorAggregator { - float firstValue; public FloatFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) { super(timeSelector, valueSelector); - firstValue = 0; } @Override @@ -45,13 +43,13 @@ public void initValue(ByteBuffer buf, int position) @Override void putValue(ByteBuffer buf, int position, int index) { - firstValue = valueSelector.getFloatVector()[index]; + float firstValue = valueSelector.getFloatVector()[index]; buf.putFloat(position, firstValue); } /** - * @return The primitive object stored at the position in the buffer. + * @return The object as a pair with the position and the value stored at the position in the buffer. */ @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java index d572d718f9b5..df971387d23a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java @@ -41,7 +41,6 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.vector.BaseLongVectorValueSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -133,14 +132,13 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); - BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) columnSelectorFactory.makeValueSelector( - timeColumn); - if (capabilities == null || capabilities.isNumeric()) { + if (capabilities.isNumeric()) { + VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); + VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( + timeColumn); return new LongFirstVectorAggregator(timeSelector, valueSelector); - } else { - return NumericNilVectorAggregator.longNilVectorAggregator(); } + return NumericNilVectorAggregator.longNilVectorAggregator(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java index b6483508c34a..769b148b5e49 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregator.java @@ -27,12 +27,9 @@ public class LongFirstVectorAggregator extends NumericFirstVectorAggregator { - long firstValue; - public LongFirstVectorAggregator(VectorValueSelector timeSelector, VectorValueSelector valueSelector) { super(timeSelector, valueSelector); - firstValue = 0; } @Override @@ -45,13 +42,13 @@ public void initValue(ByteBuffer buf, int position) @Override void putValue(ByteBuffer buf, int position, int index) { - firstValue = valueSelector.getLongVector()[index]; + long firstValue = valueSelector.getLongVector()[index]; buf.putLong(position, firstValue); } /** - * @return The primitive object stored at the position in the buffer. + * @return The object as a pair with the position and the value stored at the position in the buffer. */ @Nullable @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 969a41294c07..716db29f79ca 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -145,7 +145,7 @@ void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) } /** - *Updates the time only to the appropriate position in buffer as the value is null + * Updates the time only to the appropriate position in buffer as the value is null * * @param buf byte buffer storing the byte array representation of the aggregate * @param position offset within the byte buffer at which the current aggregate value is stored @@ -163,7 +163,7 @@ void updateTimeWithNull(ByteBuffer buf, int position, long time) abstract void initValue(ByteBuffer buf, int position); /** - *Abstract function which needs to be overridden by subclasses to set the + * Abstract function which needs to be overridden by subclasses to set the * latest value in the buffer depending on the datatype */ abstract void putValue(ByteBuffer buf, int position, int index); diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java index 1575b33dcef0..49c15fa22a51 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/DoubleFirstVectorAggregationTest.java @@ -22,23 +22,28 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.Pair; import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Answers; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.concurrent.ThreadLocalRandom; -@RunWith(MockitoJUnitRunner.class) public class DoubleFirstVectorAggregationTest extends InitializedNullHandlingTest { private static final double EPSILON = 1e-5; @@ -50,16 +55,15 @@ public class DoubleFirstVectorAggregationTest extends InitializedNullHandlingTes private static final String FIELD_NAME = "FIELD_NAME"; private static final String TIME_COL = "__time"; - @Mock private VectorValueSelector selector; - @Mock + private BaseLongVectorValueSelector timeSelector; private ByteBuffer buf; private DoubleFirstVectorAggregator target; private DoubleFirstAggregatorFactory doubleFirstAggregatorFactory; - @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; @Before @@ -68,14 +72,96 @@ public void setup() byte[] randomBytes = new byte[1024]; ThreadLocalRandom.current().nextBytes(randomBytes); buf = ByteBuffer.wrap(randomBytes); - Mockito.doReturn(VALUES).when(selector).getDoubleVector(); - Mockito.doReturn(times).when(timeSelector).getLongVector(); + timeSelector = new BaseLongVectorValueSelector(new NoFilterVectorOffset(times.length, 0, times.length) + { + }) + { + @Override + public long[] getLongVector() + { + return times; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return NULLS; + } + }; + selector = new BaseDoubleVectorValueSelector(new NoFilterVectorOffset(VALUES.length, 0, VALUES.length) + { + + }) + { + @Override + public double[] getDoubleVector() + { + return VALUES; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + if (!NullHandling.replaceWithDefault()) { + return NULLS; + } + return null; + } + }; + target = new DoubleFirstVectorAggregator(timeSelector, selector); clearBufferForPositions(0, 0); + selectorFactory = new VectorColumnSelectorFactory() + { + @Override + public ReadableVectorInspector getReadableVectorInspector() + { + return null; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public VectorValueSelector makeValueSelector(String column) + { + if (TIME_COL.equals(column)) { + return timeSelector; + } else if (FIELD_NAME.equals(column)) { + return selector; + } else { + return null; + } + } + + @Override + public VectorObjectSelector makeObjectSelector(String column) + { + return null; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + if (FIELD_NAME.equals(column)) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.DOUBLE); + } + return null; + } + }; - Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); - Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); - Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); doubleFirstAggregatorFactory = new DoubleFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); } @@ -108,7 +194,6 @@ public void aggregate() @Test public void aggregateWithNulls() { - mockNullsVector(); target.aggregate(buf, 0, 0, VALUES.length); Pair result = (Pair) target.get(buf, 0); Assert.assertEquals(times[0], result.lhs.longValue()); @@ -125,7 +210,11 @@ public void aggregateBatchWithoutRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[i], result.lhs.longValue()); - Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[i]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } } } @@ -140,7 +229,11 @@ public void aggregateBatchWithRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[rows[i]], result.lhs.longValue()); - Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[rows[i]]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } } } @@ -150,11 +243,4 @@ private void clearBufferForPositions(int offset, int... positions) target.init(buf, offset + position); } } - - private void mockNullsVector() - { - if (!NullHandling.replaceWithDefault()) { - Mockito.doReturn(NULLS).when(selector).getNullVector(); - } - } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java index 0eb7afe46e65..6b02037824a9 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/FloatFirstVectorAggregationTest.java @@ -22,44 +22,49 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.Pair; import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.BaseFloatVectorValueSelector; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Answers; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.concurrent.ThreadLocalRandom; -@RunWith(MockitoJUnitRunner.class) public class FloatFirstVectorAggregationTest extends InitializedNullHandlingTest { private static final double EPSILON = 1e-5; private static final float[] VALUES = new float[]{7.2f, 15.6f, 2.1f, 150.0f}; - private static final boolean[] NULLS = new boolean[]{true, false, true, false}; + private static final boolean[] NULLS = new boolean[]{false, false, true, false}; private long[] times = {2436, 6879, 7888, 8224}; private static final String NAME = "NAME"; private static final String FIELD_NAME = "FIELD_NAME"; private static final String TIME_COL = "__time"; - @Mock + private VectorValueSelector selector; - @Mock + private BaseLongVectorValueSelector timeSelector; private ByteBuffer buf; private FloatFirstVectorAggregator target; private FloatFirstAggregatorFactory floatFirstAggregatorFactory; - @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private VectorColumnSelectorFactory selectorFactory; @Before @@ -68,14 +73,97 @@ public void setup() byte[] randomBytes = new byte[1024]; ThreadLocalRandom.current().nextBytes(randomBytes); buf = ByteBuffer.wrap(randomBytes); - Mockito.doReturn(VALUES).when(selector).getFloatVector(); - Mockito.doReturn(times).when(timeSelector).getLongVector(); + timeSelector = new BaseLongVectorValueSelector(new NoFilterVectorOffset(times.length, 0, times.length) + { + }) + { + @Override + public long[] getLongVector() + { + return times; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return NULLS; + } + }; + selector = new BaseFloatVectorValueSelector(new NoFilterVectorOffset(VALUES.length, 0, VALUES.length) + { + + }) + { + + @Override + public float[] getFloatVector() + { + return VALUES; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + if (!NullHandling.replaceWithDefault()) { + return NULLS; + } + return null; + } + }; + target = new FloatFirstVectorAggregator(timeSelector, selector); clearBufferForPositions(0, 0); - Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); - Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); - Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + selectorFactory = new VectorColumnSelectorFactory() + { + @Override + public ReadableVectorInspector getReadableVectorInspector() + { + return null; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public VectorValueSelector makeValueSelector(String column) + { + if (TIME_COL.equals(column)) { + return timeSelector; + } else if (FIELD_NAME.equals(column)) { + return selector; + } else { + return null; + } + } + + @Override + public VectorObjectSelector makeObjectSelector(String column) + { + return null; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + if (FIELD_NAME.equals(column)) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.FLOAT); + } + return null; + } + }; floatFirstAggregatorFactory = new FloatFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); } @@ -110,16 +198,10 @@ public void aggregate() @Test public void aggregateWithNulls() { - mockNullsVector(); target.aggregate(buf, 0, 0, VALUES.length); Pair result = (Pair) target.get(buf, 0); - if (!NullHandling.replaceWithDefault()) { - Assert.assertEquals(times[1], result.lhs.longValue()); - Assert.assertEquals(VALUES[1], result.rhs, EPSILON); - } else { - Assert.assertEquals(times[0], result.lhs.longValue()); - Assert.assertEquals(VALUES[0], result.rhs, EPSILON); - } + Assert.assertEquals(times[0], result.lhs.longValue()); + Assert.assertEquals(VALUES[0], result.rhs, EPSILON); } @Test @@ -132,7 +214,11 @@ public void aggregateBatchWithoutRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[i], result.lhs.longValue()); - Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[i]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } } } @@ -147,7 +233,11 @@ public void aggregateBatchWithRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[rows[i]], result.lhs.longValue()); - Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[rows[i]]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } } } @@ -157,11 +247,4 @@ private void clearBufferForPositions(int offset, int... positions) target.init(buf, offset + position); } } - - private void mockNullsVector() - { - if (!NullHandling.replaceWithDefault()) { - Mockito.doReturn(NULLS).when(selector).getNullVector(); - } - } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java index 5f2072ef5b25..ec4017600628 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/first/LongFirstVectorAggregationTest.java @@ -22,24 +22,28 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.Pair; import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Answers; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.concurrent.ThreadLocalRandom; -@RunWith(MockitoJUnitRunner.class) public class LongFirstVectorAggregationTest extends InitializedNullHandlingTest { private static final double EPSILON = 1e-5; @@ -49,15 +53,12 @@ public class LongFirstVectorAggregationTest extends InitializedNullHandlingTest private static final String FIELD_NAME = "FIELD_NAME"; private static final String TIME_COL = "__time"; private long[] times = {2436, 6879, 7888, 8224}; - @Mock private VectorValueSelector selector; - @Mock private BaseLongVectorValueSelector timeSelector; private ByteBuffer buf; private LongFirstVectorAggregator target; private LongFirstAggregatorFactory longFirstAggregatorFactory; - @Mock(answer = Answers.RETURNS_DEEP_STUBS) private VectorColumnSelectorFactory selectorFactory; @Before @@ -66,17 +67,97 @@ public void setup() byte[] randomBytes = new byte[1024]; ThreadLocalRandom.current().nextBytes(randomBytes); buf = ByteBuffer.wrap(randomBytes); - Mockito.doReturn(VALUES).when(selector).getLongVector(); - Mockito.doReturn(times).when(timeSelector).getLongVector(); + timeSelector = new BaseLongVectorValueSelector(new NoFilterVectorOffset(times.length, 0, times.length) + { + }) + { + @Override + public long[] getLongVector() + { + return times; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return NULLS; + } + }; + selector = new BaseLongVectorValueSelector(new NoFilterVectorOffset(VALUES.length, 0, VALUES.length) + { + + }) + { + @Override + public long[] getLongVector() + { + return VALUES; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + if (!NullHandling.replaceWithDefault()) { + return NULLS; + } + return null; + } + }; + target = new LongFirstVectorAggregator(timeSelector, selector); clearBufferForPositions(0, 0); - - Mockito.doReturn(null).when(selectorFactory).getColumnCapabilities(FIELD_NAME); - Mockito.doReturn(selector).when(selectorFactory).makeValueSelector(FIELD_NAME); - Mockito.doReturn(timeSelector).when(selectorFactory).makeValueSelector(TIME_COL); + selectorFactory = new VectorColumnSelectorFactory() + { + @Override + public ReadableVectorInspector getReadableVectorInspector() + { + return null; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec) + { + return null; + } + + @Override + public VectorValueSelector makeValueSelector(String column) + { + if (TIME_COL.equals(column)) { + return timeSelector; + } else if (FIELD_NAME.equals(column)) { + return selector; + } else { + return null; + } + } + + @Override + public VectorObjectSelector makeObjectSelector(String column) + { + return null; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + if (FIELD_NAME.equals(column)) { + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.LONG); + } + return null; + } + }; longFirstAggregatorFactory = new LongFirstAggregatorFactory(NAME, FIELD_NAME, TIME_COL); - } @Test @@ -108,7 +189,6 @@ public void aggregate() @Test public void aggregateWithNulls() { - mockNullsVector(); target.aggregate(buf, 0, 0, VALUES.length); Pair result = (Pair) target.get(buf, 0); Assert.assertEquals(times[0], result.lhs.longValue()); @@ -125,7 +205,11 @@ public void aggregateBatchWithoutRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[i], result.lhs.longValue()); - Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[i]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[i], result.rhs, EPSILON); + } } } @@ -140,7 +224,11 @@ public void aggregateBatchWithRows() for (int i = 0; i < positions.length; i++) { Pair result = (Pair) target.get(buf, positions[i] + positionOffset); Assert.assertEquals(times[rows[i]], result.lhs.longValue()); - Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + if (!NullHandling.replaceWithDefault() && NULLS[rows[i]]) { + Assert.assertNull(result.rhs); + } else { + Assert.assertEquals(VALUES[rows[i]], result.rhs, EPSILON); + } } } @@ -150,11 +238,4 @@ private void clearBufferForPositions(int offset, int... positions) target.init(buf, offset + position); } } - - private void mockNullsVector() - { - if (!NullHandling.replaceWithDefault()) { - Mockito.doReturn(NULLS).when(selector).getNullVector(); - } - } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index fc56db15944a..8491fdab6374 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1314,42 +1314,6 @@ public void testStringAnyInSubquery() ); } - @Test - public void testOffHeapEarliestGroupBy() - { - notMsqCompatible(); - - testQuery( - "SELECT dim2, EARLIEST(m1) AS val1 FROM foo GROUP BY dim2", - ImmutableList.of( - GroupByQuery.builder() - .setDataSource(CalciteTests.DATASOURCE1) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) - .setAggregatorSpecs(aggregators( - new FloatFirstAggregatorFactory("a0", "m1", null) - ) - ) - .setContext(QUERY_CONTEXT_DEFAULT) - .build() - ), - NullHandling.sqlCompatible() - ? ImmutableList.of( - new Object[]{null, 2.0f}, - new Object[]{"", 3.0f}, - new Object[]{"a", 1.0f}, - new Object[]{"abc", 5.0f} - ) - : ImmutableList.of( - new Object[]{"", 2.0f}, - new Object[]{"a", 1.0f}, - new Object[]{"abc", 5.0f} - - ) - ); - } - @Test public void testEarliestAggregatorsNumericNulls() { @@ -13985,39 +13949,4 @@ public void testFilterWithNVLAndNotIn() ) ); } - - @Test - public void testEarliestVectorAggregators() - { - notMsqCompatible(); - - testQuery( - "SELECT " - + "EARLIEST(cnt), EARLIEST(cnt + 1), EARLIEST(m1), EARLIEST(m1+1) " - + "FROM druid.numfoo", - ImmutableList.of( - Druids.newTimeseriesQueryBuilder() - .dataSource(CalciteTests.DATASOURCE3) - .intervals(querySegmentSpec(Filtration.eternity())) - .granularity(Granularities.ALL) - .virtualColumns( - expressionVirtualColumn("v0", "(\"cnt\" + 1)", ColumnType.LONG), - expressionVirtualColumn("v1", "(\"m1\" + 1)", ColumnType.FLOAT) - ) - .aggregators( - aggregators( - new LongFirstAggregatorFactory("a0", "cnt", null), - new LongFirstAggregatorFactory("a1", "v0", null), - new FloatFirstAggregatorFactory("a2", "m1", null), - new FloatFirstAggregatorFactory("a3", "v1", null) - ) - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of( - new Object[]{1L, 2L, 1.0f, 2.0f} - ) - ); - } } From 6cae4901e36613d2cf1d97fe17423680c5a90931 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Thu, 13 Jul 2023 13:49:35 -0700 Subject: [PATCH 09/20] Addressing issues for dictionary encoded single string columns where we can use the dictionary ids instead of the entire string --- .../apache/druid/query/UnnestDataSource.java | 7 +- ...iStringFirstDimensionVectorAggregator.java | 124 ++++++++++++++++++ ...eStringFirstDimensionVectorAggregator.java | 118 +++++++++++++++++ .../first/StringFirstAggregatorFactory.java | 42 +++++- .../first/StringFirstVectorAggregator.java | 2 +- .../druid/sql/calcite/CalciteQueryTest.java | 70 +++++++++- 6 files changed, 356 insertions(+), 7 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java create mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index acd984b64422..72859f02c9ce 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -61,9 +61,10 @@ private UnnestDataSource( DimFilter unnestFilter ) { - this.base = dataSource; - this.virtualColumn = virtualColumn; - this.unnestFilter = unnestFilter; + // select * from UNNEST(ARRAY[1,2,3]) as somu(d3) where somu.d3 IN ('a','b') + this.base = dataSource; // table + this.virtualColumn = virtualColumn; // MV_TO_ARRAY + this.unnestFilter = unnestFilter; // d3 in (a,b) } @JsonCreator diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java new file mode 100644 index 000000000000..c13165a39441 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class MultiStringFirstDimensionVectorAggregator implements VectorAggregator +{ + private final BaseLongVectorValueSelector timeSelector; + private final MultiValueDimensionVectorSelector valueDimensionVectorSelector; + private long firstTime; + private final int maxStringBytes; + private final boolean useDefault = NullHandling.replaceWithDefault(); + + public MultiStringFirstDimensionVectorAggregator( + BaseLongVectorValueSelector timeSelector, + MultiValueDimensionVectorSelector valueDimensionVectorSelector, + int maxStringBytes + ) + { + this.timeSelector = timeSelector; + this.valueDimensionVectorSelector = valueDimensionVectorSelector; + this.maxStringBytes = maxStringBytes; + firstTime = Long.MAX_VALUE; + } + + @Override + public void init(ByteBuffer buf, int position) + { + buf.putLong(position, Long.MAX_VALUE); + buf.put( + position + NumericFirstVectorAggregator.NULL_OFFSET, + useDefault ? NullHandling.IS_NOT_NULL_BYTE : NullHandling.IS_NULL_BYTE + ); + buf.putLong(position + NumericFirstVectorAggregator.VALUE_OFFSET, 0); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final long[] timeVector = timeSelector.getLongVector(); + final IndexedInts[] valueVector = valueDimensionVectorSelector.getRowVector(); + firstTime = buf.getLong(position); + int index = startRow; + for (int i = startRow; i < endRow; i++) { + if (valueVector[i].get(0) != 0) { + index = i; + break; + } + } + + final long earliestTime = timeVector[index]; + if (earliestTime < firstTime) { + firstTime = earliestTime; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, valueVector[index].get(0)); + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + long[] timeVector = timeSelector.getLongVector(); + IndexedInts[] values = valueDimensionVectorSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + firstTime = timeVector[row]; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt( + position + NumericFirstVectorAggregator.VALUE_OFFSET, + values[row].size() > 0 ? values[row].get(0) : 0 + ); + } + } + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + int index = buf.getInt(position + NumericFirstVectorAggregator.VALUE_OFFSET); + long earliest = buf.getLong(position); + String strValue = valueDimensionVectorSelector.lookupName(index); + return new SerializablePairLongString(earliest, StringUtils.chop(strValue, maxStringBytes)); + + } + + @Override + public void close() + { + + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java new file mode 100644 index 000000000000..2e6184273fdb --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.first; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class SingleStringFirstDimensionVectorAggregator implements VectorAggregator +{ + private final BaseLongVectorValueSelector timeSelector; + private final SingleValueDimensionVectorSelector valueDimensionVectorSelector; + private long firstTime; + private final int maxStringBytes; + private final boolean useDefault = NullHandling.replaceWithDefault(); + + public SingleStringFirstDimensionVectorAggregator( + BaseLongVectorValueSelector timeSelector, + SingleValueDimensionVectorSelector valueDimensionVectorSelector, + int maxStringBytes + ) + { + this.timeSelector = timeSelector; + this.valueDimensionVectorSelector = valueDimensionVectorSelector; + this.maxStringBytes = maxStringBytes; + this.firstTime = Long.MAX_VALUE; + } + + @Override + public void init(ByteBuffer buf, int position) + { + buf.putLong(position, Long.MAX_VALUE); + buf.put( + position + NumericFirstVectorAggregator.NULL_OFFSET, + useDefault ? NullHandling.IS_NOT_NULL_BYTE : NullHandling.IS_NULL_BYTE + ); + buf.putLong(position + NumericFirstVectorAggregator.VALUE_OFFSET, 0); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final long[] timeVector = timeSelector.getLongVector(); + final int[] valueVector = valueDimensionVectorSelector.getRowVector(); + firstTime = buf.getLong(position); + int index = startRow; + for (int i = startRow; i < endRow; i++) { + index = i; + break; + } + + final long earliestTime = timeVector[index]; + if (earliestTime < firstTime) { + firstTime = earliestTime; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, valueVector[index]); + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + long[] timeVector = timeSelector.getLongVector(); + int[] values = valueDimensionVectorSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + firstTime = timeVector[row]; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); + } + } + + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + int index = buf.getInt(position + NumericFirstVectorAggregator.VALUE_OFFSET); + long earliest = buf.getLong(position); + String strValue = valueDimensionVectorSelector.lookupName(index); + return new SerializablePairLongString(earliest, StringUtils.chop(strValue, maxStringBytes)); + } + + @Override + public void close() + { + // nothing to close + } +} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java index 3cc31b2737ab..d4463a07ee5a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java @@ -34,6 +34,7 @@ import org.apache.druid.query.aggregation.SerializablePairLongString; import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -41,7 +42,10 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -105,6 +109,8 @@ public void aggregate(ByteBuffer buf, int position) private final String timeColumn; protected final int maxStringBytes; + private boolean getFirstElementFromMvd; + @JsonCreator public StringFirstAggregatorFactory( @JsonProperty("name") String name, @@ -126,8 +132,10 @@ public StringFirstAggregatorFactory( this.maxStringBytes = maxStringBytes == null ? StringFirstAggregatorFactory.DEFAULT_MAX_STRING_SIZE : maxStringBytes; + this.getFirstElementFromMvd = false; } + @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { @@ -163,10 +171,30 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { - ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); - VectorObjectSelector vSelector = selectorFactory.makeObjectSelector(fieldName); BaseLongVectorValueSelector timeSelector = (BaseLongVectorValueSelector) selectorFactory.makeValueSelector( timeColumn); + ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + if (capabilities != null) { + if (capabilities.is(ValueType.STRING) && capabilities.isDictionaryEncoded().isTrue()) { + // Case 1: Multivalue string with dimension selector + if (capabilities.hasMultipleValues().isTrue()) { + if (isGetFirstElementFromMvd()) { + MultiValueDimensionVectorSelector mSelector = selectorFactory.makeMultiValueDimensionSelector( + DefaultDimensionSpec.of( + fieldName)); + return new MultiStringFirstDimensionVectorAggregator(timeSelector, mSelector, maxStringBytes); + } + } else { + // Case 2: Single string with dimension selector + SingleValueDimensionVectorSelector sSelector = selectorFactory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of( + fieldName)); + return new SingleStringFirstDimensionVectorAggregator(timeSelector, sSelector, maxStringBytes); + } + } + } + // Case 3: return vector object selector + VectorObjectSelector vSelector = selectorFactory.makeObjectSelector(fieldName); if (capabilities != null) { return new StringFirstVectorAggregator(timeSelector, vSelector, maxStringBytes); } else { @@ -255,6 +283,16 @@ public List requiredFields() return Arrays.asList(timeColumn, fieldName); } + public boolean isGetFirstElementFromMvd() + { + return getFirstElementFromMvd; + } + + public void setGetFirstElementFromMvd(boolean getFirstElementFromMvd) + { + this.getFirstElementFromMvd = getFirstElementFromMvd; + } + @Override public byte[] getCacheKey() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 7c94c2028878..ea77084518a8 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -38,7 +38,7 @@ public class StringFirstVectorAggregator implements VectorAggregator private final BaseLongVectorValueSelector timeSelector; private final VectorObjectSelector valueSelector; private final int maxStringBytes; - //protected long firstTime; + public StringFirstVectorAggregator( BaseLongVectorValueSelector timeSelector, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 8491fdab6374..12a5443df359 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1161,7 +1161,7 @@ public void testStringLatestInSubquery() public void testStringEarliestInSubquery() { testQuery( - "SELECT SUM(val) FROM (SELECT dim2, EARLIEST(dim1, 10) AS val FROM foo GROUP BY dim2)", + "SELECT SUM(val) FROM (SELECT dim2, EARLIEST(dim1,10) AS val FROM foo GROUP BY dim2)", ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -1267,6 +1267,74 @@ public void testPrimitiveAnyInSubquery() ); } + @Test + public void testStringEarliestSingleStringDim() + { + testQuery( + "SELECT dim2, EARLIEST(dim1,10) AS val FROM foo GROUP BY dim2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setAggregatorSpecs(aggregators(new StringFirstAggregatorFactory( + "a0", + "dim1", + null, + 10 + ))) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.sqlCompatible() ? + ImmutableList.of( + new Object[]{null, "10.1"}, + new Object[]{"", "2"}, + new Object[]{"a", ""}, + new Object[]{"abc", "def"} + ) : ImmutableList.of( + new Object[]{"", "10.1"}, + new Object[]{"a", ""}, + new Object[]{"abc", "def"} + ) + ); + } + + @Test + public void testStringEarliestMultiStringDim() + { + testQuery( + "SELECT dim2, EARLIEST(dim3,10) AS val FROM foo GROUP BY dim2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) + .setAggregatorSpecs(aggregators(new StringFirstAggregatorFactory( + "a0", + "dim3", + null, + 10 + ))) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.sqlCompatible() ? + ImmutableList.of( + new Object[]{null, "[b, c]"}, + new Object[]{"", "d"}, + new Object[]{"a", "[a, b]"}, + new Object[]{"abc", null} + ) : ImmutableList.of( + new Object[]{"", "[b, c]"}, + new Object[]{"a", "[a, b]"}, + new Object[]{"abc", ""} + ) + ); + } + // This test the off-heap (buffer) version of the AnyAggregator (String) @Test public void testStringAnyInSubquery() From ef87989166735a3bab2a1eae7dda1028ec2d7556 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Thu, 13 Jul 2023 15:18:54 -0700 Subject: [PATCH 10/20] Adding a flag for multi value dimension selector --- .../first/SingleStringFirstDimensionVectorAggregator.java | 4 ---- .../aggregation/first/StringFirstAggregatorFactory.java | 5 ----- .../java/org/apache/druid/sql/calcite/CalciteQueryTest.java | 1 + 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java index 2e6184273fdb..2d876efd6f94 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java @@ -67,10 +67,6 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final int[] valueVector = valueDimensionVectorSelector.getRowVector(); firstTime = buf.getLong(position); int index = startRow; - for (int i = startRow; i < endRow; i++) { - index = i; - break; - } final long earliestTime = timeVector[index]; if (earliestTime < firstTime) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java index d4463a07ee5a..6168f1d2442b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java @@ -288,11 +288,6 @@ public boolean isGetFirstElementFromMvd() return getFirstElementFromMvd; } - public void setGetFirstElementFromMvd(boolean getFirstElementFromMvd) - { - this.getFirstElementFromMvd = getFirstElementFromMvd; - } - @Override public byte[] getCacheKey() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 12a5443df359..6961b2803c0e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -1270,6 +1270,7 @@ public void testPrimitiveAnyInSubquery() @Test public void testStringEarliestSingleStringDim() { + notMsqCompatible(); testQuery( "SELECT dim2, EARLIEST(dim1,10) AS val FROM foo GROUP BY dim2", ImmutableList.of( From 4c5813d16522e6332958dde3e58a72cdb6579daf Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 19 Jul 2023 12:02:42 -0700 Subject: [PATCH 11/20] Addressing comments --- .../apache/druid/query/UnnestDataSource.java | 7 +- .../first/DoubleFirstAggregatorFactory.java | 2 +- .../first/FloatFirstAggregatorFactory.java | 2 +- .../first/LongFirstAggregatorFactory.java | 2 +- ...iStringFirstDimensionVectorAggregator.java | 124 ------------------ .../first/NumericFirstVectorAggregator.java | 34 ++--- .../first/StringFirstAggregatorFactory.java | 47 ++++--- 7 files changed, 46 insertions(+), 172 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 72859f02c9ce..acd984b64422 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -61,10 +61,9 @@ private UnnestDataSource( DimFilter unnestFilter ) { - // select * from UNNEST(ARRAY[1,2,3]) as somu(d3) where somu.d3 IN ('a','b') - this.base = dataSource; // table - this.virtualColumn = virtualColumn; // MV_TO_ARRAY - this.unnestFilter = unnestFilter; // d3 in (a,b) + this.base = dataSource; + this.virtualColumn = virtualColumn; + this.unnestFilter = unnestFilter; } @JsonCreator diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java index 406335ad2a65..01457e0d0cf5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java @@ -143,7 +143,7 @@ public VectorAggregator factorizeVector( ) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities.isNumeric()) { + if (capabilities != null && capabilities.isNumeric()) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java index d755f3eca909..29f02e6fda18 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java @@ -133,7 +133,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities.isNumeric()) { + if (capabilities != null && capabilities.isNumeric()) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector(timeColumn); return new FloatFirstVectorAggregator(timeSelector, valueSelector); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java index df971387d23a..197550b7b8d3 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java @@ -132,7 +132,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities.isNumeric()) { + if (capabilities != null && capabilities.isNumeric()) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java deleted file mode 100644 index c13165a39441..000000000000 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/MultiStringFirstDimensionVectorAggregator.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.query.aggregation.first; - -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.aggregation.SerializablePairLongString; -import org.apache.druid.query.aggregation.VectorAggregator; -import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.vector.BaseLongVectorValueSelector; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -public class MultiStringFirstDimensionVectorAggregator implements VectorAggregator -{ - private final BaseLongVectorValueSelector timeSelector; - private final MultiValueDimensionVectorSelector valueDimensionVectorSelector; - private long firstTime; - private final int maxStringBytes; - private final boolean useDefault = NullHandling.replaceWithDefault(); - - public MultiStringFirstDimensionVectorAggregator( - BaseLongVectorValueSelector timeSelector, - MultiValueDimensionVectorSelector valueDimensionVectorSelector, - int maxStringBytes - ) - { - this.timeSelector = timeSelector; - this.valueDimensionVectorSelector = valueDimensionVectorSelector; - this.maxStringBytes = maxStringBytes; - firstTime = Long.MAX_VALUE; - } - - @Override - public void init(ByteBuffer buf, int position) - { - buf.putLong(position, Long.MAX_VALUE); - buf.put( - position + NumericFirstVectorAggregator.NULL_OFFSET, - useDefault ? NullHandling.IS_NOT_NULL_BYTE : NullHandling.IS_NULL_BYTE - ); - buf.putLong(position + NumericFirstVectorAggregator.VALUE_OFFSET, 0); - } - - @Override - public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) - { - final long[] timeVector = timeSelector.getLongVector(); - final IndexedInts[] valueVector = valueDimensionVectorSelector.getRowVector(); - firstTime = buf.getLong(position); - int index = startRow; - for (int i = startRow; i < endRow; i++) { - if (valueVector[i].get(0) != 0) { - index = i; - break; - } - } - - final long earliestTime = timeVector[index]; - if (earliestTime < firstTime) { - firstTime = earliestTime; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, valueVector[index].get(0)); - } - } - - @Override - public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) - { - long[] timeVector = timeSelector.getLongVector(); - IndexedInts[] values = valueDimensionVectorSelector.getRowVector(); - for (int i = 0; i < numRows; i++) { - int position = positions[i] + positionOffset; - int row = rows == null ? i : rows[i]; - long firstTime = buf.getLong(position); - if (timeVector[row] < firstTime) { - firstTime = timeVector[row]; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt( - position + NumericFirstVectorAggregator.VALUE_OFFSET, - values[row].size() > 0 ? values[row].get(0) : 0 - ); - } - } - } - - @Nullable - @Override - public Object get(ByteBuffer buf, int position) - { - int index = buf.getInt(position + NumericFirstVectorAggregator.VALUE_OFFSET); - long earliest = buf.getLong(position); - String strValue = valueDimensionVectorSelector.lookupName(index); - return new SerializablePairLongString(earliest, StringUtils.chop(strValue, maxStringBytes)); - - } - - @Override - public void close() - { - - } -} diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 716db29f79ca..ee5ad0ee360d 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -58,34 +58,28 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) { final long[] timeVector = timeSelector.getLongVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); - boolean nullAbsent = false; firstTime = buf.getLong(position); // check if nullVector is found or not // the nullVector is null if no null values are found // set the nullAbsent flag accordingly - if (nullValueVector == null) { - nullAbsent = true; - } // the time vector is already sorted so the first element would be the earliest // traverse accordingly - int index = startRow; - if (!useDefault && !nullAbsent) { - for (int i = startRow; i < endRow; i++) { - if (!nullValueVector[i]) { - index = i; - break; - } - } - } + int index; - final long earliestTime = timeVector[index]; - if (earliestTime < firstTime) { - firstTime = earliestTime; - if (useDefault || nullValueVector == null || !nullValueVector[index]) { - updateTimeWithValue(buf, position, firstTime, index); - } else { - updateTimeWithNull(buf, position, firstTime); + for (int i = startRow; i < endRow; i++) { + index = i; + final long earliestTime = timeVector[index]; + if (earliestTime > firstTime) { + break; + } + if (earliestTime < firstTime) { + firstTime = earliestTime; + if (useDefault || nullValueVector == null || !nullValueVector[index]) { + updateTimeWithValue(buf, position, firstTime, index); + } else { + updateTimeWithNull(buf, position, firstTime); + } } } } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java index 6168f1d2442b..834653b8633d 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregatorFactory.java @@ -44,7 +44,6 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -77,6 +76,7 @@ public void aggregate() } }; + private static final BufferAggregator NIL_BUFFER_AGGREGATOR = new StringFirstBufferAggregator( NilColumnValueSelector.instance(), NilColumnValueSelector.instance(), @@ -91,6 +91,25 @@ public void aggregate(ByteBuffer buf, int position) } }; + public static final VectorAggregator NIL_VECTOR_AGGREGATOR = new StringFirstVectorAggregator( + null, + null, + 0 + ) + { + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + // no-op + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + // no-op + } + }; + public static final int DEFAULT_MAX_STRING_SIZE = 1024; public static final Comparator TIME_COMPARATOR = (o1, o2) -> Longs.compare( @@ -109,8 +128,6 @@ public void aggregate(ByteBuffer buf, int position) private final String timeColumn; protected final int maxStringBytes; - private boolean getFirstElementFromMvd; - @JsonCreator public StringFirstAggregatorFactory( @JsonProperty("name") String name, @@ -132,7 +149,6 @@ public StringFirstAggregatorFactory( this.maxStringBytes = maxStringBytes == null ? StringFirstAggregatorFactory.DEFAULT_MAX_STRING_SIZE : maxStringBytes; - this.getFirstElementFromMvd = false; } @@ -176,16 +192,10 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); if (capabilities != null) { if (capabilities.is(ValueType.STRING) && capabilities.isDictionaryEncoded().isTrue()) { - // Case 1: Multivalue string with dimension selector - if (capabilities.hasMultipleValues().isTrue()) { - if (isGetFirstElementFromMvd()) { - MultiValueDimensionVectorSelector mSelector = selectorFactory.makeMultiValueDimensionSelector( - DefaultDimensionSpec.of( - fieldName)); - return new MultiStringFirstDimensionVectorAggregator(timeSelector, mSelector, maxStringBytes); - } - } else { - // Case 2: Single string with dimension selector + // Case 1: Single value string with dimension selector + // For multivalue string we need to iterate a list of indexedInts which is also similar to iterating + // over elements for an ARRAY typed column. These two which requires an iteration will be done together. + if (!capabilities.hasMultipleValues().isTrue()) { SingleValueDimensionVectorSelector sSelector = selectorFactory.makeSingleValueDimensionSelector( DefaultDimensionSpec.of( fieldName)); @@ -193,12 +203,12 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact } } } - // Case 3: return vector object selector + // Case 2: return vector object selector VectorObjectSelector vSelector = selectorFactory.makeObjectSelector(fieldName); if (capabilities != null) { return new StringFirstVectorAggregator(timeSelector, vSelector, maxStringBytes); } else { - return new StringFirstVectorAggregator(null, vSelector, maxStringBytes); + return NIL_VECTOR_AGGREGATOR; } } @@ -283,11 +293,6 @@ public List requiredFields() return Arrays.asList(timeColumn, fieldName); } - public boolean isGetFirstElementFromMvd() - { - return getFirstElementFromMvd; - } - @Override public byte[] getCacheKey() { From 67fce5f20eeb5ef2a1b9c7cc4b09ea92416574d0 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 19 Jul 2023 12:13:33 -0700 Subject: [PATCH 12/20] 1 more change --- .../first/NumericFirstVectorAggregator.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index ee5ad0ee360d..a15f8d7e8249 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -70,16 +70,14 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) for (int i = startRow; i < endRow; i++) { index = i; final long earliestTime = timeVector[index]; - if (earliestTime > firstTime) { + if (earliestTime >= firstTime) { break; } - if (earliestTime < firstTime) { - firstTime = earliestTime; - if (useDefault || nullValueVector == null || !nullValueVector[index]) { - updateTimeWithValue(buf, position, firstTime, index); - } else { - updateTimeWithNull(buf, position, firstTime); - } + firstTime = earliestTime; + if (useDefault || nullValueVector == null || !nullValueVector[index]) { + updateTimeWithValue(buf, position, firstTime, index); + } else { + updateTimeWithNull(buf, position, firstTime); } } } From 1c372affd6f18e1187274d132b2a53d8636b70ab Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 7 Aug 2023 10:23:08 -0700 Subject: [PATCH 13/20] Handling review comments part 1 --- .../queries/wikipedia_editstream_queries.json | 75 ++++++++++++------- .../first/DoubleFirstAggregatorFactory.java | 3 +- .../first/FloatFirstAggregatorFactory.java | 3 +- .../first/LongFirstAggregatorFactory.java | 3 +- .../first/StringFirstVectorAggregator.java | 3 +- 5 files changed, 58 insertions(+), 29 deletions(-) diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 59a5c6ca70b8..ee5f882c1520 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -13,7 +13,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -119,7 +120,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -270,7 +272,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -364,7 +367,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -399,7 +403,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -514,7 +519,8 @@ "metric": "unique_users", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -693,7 +699,8 @@ "metric": "count", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -878,7 +885,8 @@ "metric": "count", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -989,7 +997,8 @@ }, "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1064,7 +1073,8 @@ }, "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1128,7 +1138,8 @@ ], "dimensions": ["namespace"], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1243,7 +1254,8 @@ "orderBy": ["robot", "namespace"] }, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1342,7 +1354,8 @@ "value": "league_of_legends" }, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1370,7 +1383,8 @@ "queryType": "timeBoundary", "dataSource": "wikipedia_editstream", "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1459,7 +1473,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1519,7 +1534,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1583,7 +1599,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1672,7 +1689,8 @@ "limit": 3 }, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1724,7 +1742,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1766,7 +1785,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1827,7 +1847,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1847,7 +1868,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1884,7 +1906,8 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -1904,7 +1927,8 @@ } ], "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } @@ -2003,7 +2027,8 @@ "limit": 3 }, "context": { - "useCache": "true", + "useCache": "true", + "vectorize": "false", "populateCache": "true", "timeout": 360000 } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java index 01457e0d0cf5..8d180c62a733 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java @@ -41,6 +41,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.Types; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -143,7 +144,7 @@ public VectorAggregator factorizeVector( ) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isNumeric()) { + if (capabilities != null && Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java index 29f02e6fda18..183c87772aa2 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java @@ -41,6 +41,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.Types; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -133,7 +134,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isNumeric()) { + if (capabilities != null && Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector(timeColumn); return new FloatFirstVectorAggregator(timeSelector, valueSelector); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java index 197550b7b8d3..2727cdb1426c 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java @@ -41,6 +41,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.Types; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorValueSelector; @@ -132,7 +133,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isNumeric()) { + if (capabilities != null && Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index ea77084518a8..1b58d6320aa7 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -25,6 +25,7 @@ import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -35,7 +36,7 @@ public class StringFirstVectorAggregator implements VectorAggregator DateTimes.MAX.getMillis(), null ); - private final BaseLongVectorValueSelector timeSelector; + private final VectorValueSelector timeSelector; private final VectorObjectSelector valueSelector; private final int maxStringBytes; From f58541273bc49806158454fbd18e7130aebc74b0 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 16 Aug 2023 15:43:21 -0700 Subject: [PATCH 14/20] Handling review comments and correctness fix for latest_by when the time expression need not be in sorted order --- .../first/NumericFirstVectorAggregator.java | 43 ++++++------ ...eStringFirstDimensionVectorAggregator.java | 65 ++++++++++++++----- .../first/StringFirstAggregator.java | 6 +- .../first/StringFirstBufferAggregator.java | 6 +- .../first/StringFirstLastUtils.java | 3 + .../first/StringFirstVectorAggregator.java | 12 ++-- .../last/StringLastAggregator.java | 6 +- .../last/StringLastBufferAggregator.java | 6 +- .../druid/sql/calcite/CalciteQueryTest.java | 27 ++++++++ 9 files changed, 120 insertions(+), 54 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index a15f8d7e8249..6207b17b672b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -57,21 +57,21 @@ public void init(ByteBuffer buf, int position) public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) { final long[] timeVector = timeSelector.getLongVector(); + final boolean[] nullTimeVector = timeSelector.getNullVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); - firstTime = buf.getLong(position); - // check if nullVector is found or not - // the nullVector is null if no null values are found - // set the nullAbsent flag accordingly - // the time vector is already sorted so the first element would be the earliest - // traverse accordingly int index; - - for (int i = startRow; i < endRow; i++) { - index = i; + // Now we are iterating over the values to find the minima as the + // timestamp expression in EARLIEST_BY has no established sorting order + // If we know that the time is already sorted this can be optimized + // for the general EARLIEST call which is always on __time which is sorted + for (index = startRow; index < endRow; index++) { + if (nullTimeVector != null && nullTimeVector[index]) { + continue; + } final long earliestTime = timeVector[index]; if (earliestTime >= firstTime) { - break; + continue; } firstTime = earliestTime; if (useDefault || nullValueVector == null || !nullValueVector[index]) { @@ -83,11 +83,10 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) } /** - * * Checks if the aggregated value at a position in the buffer is null or not * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored * @return */ boolean isValueNull(ByteBuffer buf, int position) @@ -110,7 +109,7 @@ public void aggregate( for (int i = 0; i < numRows; i++) { int position = positions[i] + positionOffset; int row = rows == null ? i : rows[i]; - long firstTime = buf.getLong(position); + firstTime = buf.getLong(position); if (timeVector[row] < firstTime) { if (useDefault || nulls == null || !nulls[row]) { updateTimeWithValue(buf, position, timeVector[row], row); @@ -124,10 +123,10 @@ public void aggregate( /** * Updates the time and the non null values to the appropriate position in buffer * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored - * @param time the time to be updated in the buffer as the last time - * @param index the index of the vectorized vector which is the last value + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time + * @param index the index of the vectorized vector which is the last value */ void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) { @@ -139,9 +138,9 @@ void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) /** * Updates the time only to the appropriate position in buffer as the value is null * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored - * @param time the time to be updated in the buffer as the last time + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time */ void updateTimeWithNull(ByteBuffer buf, int position, long time) { @@ -150,7 +149,7 @@ void updateTimeWithNull(ByteBuffer buf, int position, long time) } /** - *Abstract function which needs to be overridden by subclasses to set the initial value + * Abstract function which needs to be overridden by subclasses to set the initial value */ abstract void initValue(ByteBuffer buf, int position); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java index 2d876efd6f94..6428b173a1df 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java @@ -66,14 +66,21 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final long[] timeVector = timeSelector.getLongVector(); final int[] valueVector = valueDimensionVectorSelector.getRowVector(); firstTime = buf.getLong(position); - int index = startRow; + int index; + long earliestTime; - final long earliestTime = timeVector[index]; - if (earliestTime < firstTime) { - firstTime = earliestTime; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, valueVector[index]); + // Now we are iterating over the values to find the minima as the + // timestamp expression in EARLIEST_BY has no established sorting order + // If we know that the time is already sorted this can be optimized + // for the general EARLIEST call which is always on __time which is sorted + for (index = startRow; index < endRow; index++) { + earliestTime = timeVector[index]; + if (earliestTime < firstTime) { + firstTime = earliestTime; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, valueVector[index]); + } } } @@ -81,19 +88,45 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) { long[] timeVector = timeSelector.getLongVector(); + boolean[] nullTimeVector = timeSelector.getNullVector(); int[] values = valueDimensionVectorSelector.getRowVector(); - for (int i = 0; i < numRows; i++) { - int position = positions[i] + positionOffset; - int row = rows == null ? i : rows[i]; - long firstTime = buf.getLong(position); - if (timeVector[row] < firstTime) { - firstTime = timeVector[row]; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); + // Now we are iterating over the values to find the minima as the + // timestamp expression in EARLIEST_BY has no established sorting order + // If we know that the time is already sorted this can be optimized + // for the general EARLIEST call which is always on __time which is sorted + + // The hotpath is separated out into 2 cases when nullTimeVector + // is null and not-null so that the check is not on every value + if (nullTimeVector != null) { + for (int i = 0; i < numRows; i++) { + if (nullTimeVector[i]) { + continue; + } + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + firstTime = timeVector[row]; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); + } + } + } else { + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + firstTime = timeVector[row]; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); + } } } + } @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java index 8a6654fbfdff..0d05833378c6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java @@ -56,9 +56,6 @@ public StringFirstAggregator( @Override public void aggregate() { - if (timeSelector.isNull()) { - return; - } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -72,6 +69,9 @@ public void aggregate() firstValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes); } } else { + if (timeSelector.isNull()) { + return; + } final long time = timeSelector.getLong(); if (time < firstTime) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java index fbf2a4156c56..563455c9eefa 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java @@ -63,9 +63,6 @@ public void init(ByteBuffer buf, int position) @Override public void aggregate(ByteBuffer buf, int position) { - if (timeSelector.isNull()) { - return; - } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -86,6 +83,9 @@ public void aggregate(ByteBuffer buf, int position) } } } else { + if (timeSelector.isNull()) { + return; + } final long time = timeSelector.getLong(); final long firstTime = buf.getLong(position); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java index 3a9b8818cd0b..14538fe4712e 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java @@ -120,6 +120,9 @@ public static SerializablePairLongString readPairFromSelectors( time = pair.lhs; string = pair.rhs; } else if (object != null) { + if (timeSelector.isNull()) { + return null; + } time = timeSelector.getLong(); string = DimensionHandlerUtils.convertObjectToString(object); } else { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 1b58d6320aa7..088f84b39316 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -68,11 +68,14 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); long firstTime = buf.getLong(position); int index; - for (int i = startRow; i < endRow; i++) { - if (times[i] > firstTime) { - break; + // Now we are iterating over the values to find the minima as the + // timestamp expression in EARLIEST_BY has no established sorting order + // If we know that the time is already sorted this can be optimized + // for the general EARLIEST call which is always on __time which is sorted + for (index = startRow; index < endRow; index++) { + if (times[index] > firstTime) { + continue; } - index = i; final boolean foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(objectsWhichMightBeStrings[index]); if (foldNeeded) { final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( @@ -132,6 +135,7 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in long firstTime = buf.getLong(position); if (timeVector[row] < firstTime) { if (foldNeeded) { + firstTime = timeVector[row]; final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( timeSelector, valueSelector, diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java index a7c33c8ad23e..f1dbab60938b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java @@ -57,9 +57,6 @@ public StringLastAggregator( @Override public void aggregate() { - if (timeSelector.isNull()) { - return; - } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -73,6 +70,9 @@ public void aggregate() lastValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes); } } else { + if (timeSelector.isNull()) { + return; + } final long time = timeSelector.getLong(); if (time >= lastTime) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java index 8611ef72365a..3f78745f5fad 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java @@ -64,9 +64,6 @@ public void init(ByteBuffer buf, int position) @Override public void aggregate(ByteBuffer buf, int position) { - if (timeSelector.isNull()) { - return; - } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -87,6 +84,9 @@ public void aggregate(ByteBuffer buf, int position) } } } else { + if (timeSelector.isNull()) { + return; + } final long time = timeSelector.getLong(); final long lastTime = buf.getLong(position); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 13a6afcb83cb..dbb382f9270c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -707,6 +707,33 @@ public void testLatestVectorAggregators() ); } + @Test + public void testEarliestVectorAggregators() + { + notMsqCompatible(); + testQuery( + "SELECT " + + "EARLIEST(dim1, 10) " + + "FROM druid.numfoo", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .aggregators( + aggregators( + new StringFirstAggregatorFactory("a0", "dim1", "__time", 10) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{""} + ) + ); + } + @Test public void testLatestAggregators() { From 429170990192883e51812311c49d2e461e6db732 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 16 Aug 2023 17:06:57 -0700 Subject: [PATCH 15/20] Updating numeric first vector agg --- .../query/aggregation/first/NumericFirstVectorAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 6207b17b672b..75d4354509c5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -59,7 +59,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final long[] timeVector = timeSelector.getLongVector(); final boolean[] nullTimeVector = timeSelector.getNullVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); - + firstTime = buf.getLong(position); int index; // Now we are iterating over the values to find the minima as the // timestamp expression in EARLIEST_BY has no established sorting order From 890c865514982fea6a68f4f72664c4192f0f959b Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 21 Aug 2023 09:54:14 -0700 Subject: [PATCH 16/20] Revert "Updating numeric first vector agg" This reverts commit 429170990192883e51812311c49d2e461e6db732. --- .../query/aggregation/first/NumericFirstVectorAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 75d4354509c5..6207b17b672b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -59,7 +59,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final long[] timeVector = timeSelector.getLongVector(); final boolean[] nullTimeVector = timeSelector.getNullVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); - firstTime = buf.getLong(position); + int index; // Now we are iterating over the values to find the minima as the // timestamp expression in EARLIEST_BY has no established sorting order From 6e75540d56cd2ca0defe66639a92e65feff25aa5 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 21 Aug 2023 09:54:55 -0700 Subject: [PATCH 17/20] Updating code for correctness issues --- .../first/NumericFirstVectorAggregator.java | 37 ++++++++------ ...eStringFirstDimensionVectorAggregator.java | 50 ++++--------------- .../first/StringFirstAggregator.java | 6 +-- .../first/StringFirstBufferAggregator.java | 6 +-- .../first/StringFirstLastUtils.java | 3 -- .../first/StringFirstVectorAggregator.java | 10 ++-- .../last/StringLastAggregator.java | 6 +-- .../last/StringLastBufferAggregator.java | 6 +-- .../druid/sql/calcite/CalciteQueryTest.java | 27 ---------- 9 files changed, 46 insertions(+), 105 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 6207b17b672b..7aaeb5060457 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -59,13 +59,17 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final long[] timeVector = timeSelector.getLongVector(); final boolean[] nullTimeVector = timeSelector.getNullVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); + firstTime = buf.getLong(position); + // check if nullVector is found or not + // the nullVector is null if no null values are found + // set the nullAbsent flag accordingly + // the time vector is already sorted so the first element would be the earliest + // traverse accordingly int index; - // Now we are iterating over the values to find the minima as the - // timestamp expression in EARLIEST_BY has no established sorting order - // If we know that the time is already sorted this can be optimized - // for the general EARLIEST call which is always on __time which is sorted - for (index = startRow; index < endRow; index++) { + + for (int i = startRow; i < endRow; i++) { + index = i; if (nullTimeVector != null && nullTimeVector[index]) { continue; } @@ -83,10 +87,11 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) } /** + * * Checks if the aggregated value at a position in the buffer is null or not * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored * @return */ boolean isValueNull(ByteBuffer buf, int position) @@ -109,7 +114,7 @@ public void aggregate( for (int i = 0; i < numRows; i++) { int position = positions[i] + positionOffset; int row = rows == null ? i : rows[i]; - firstTime = buf.getLong(position); + long firstTime = buf.getLong(position); if (timeVector[row] < firstTime) { if (useDefault || nulls == null || !nulls[row]) { updateTimeWithValue(buf, position, timeVector[row], row); @@ -123,10 +128,10 @@ public void aggregate( /** * Updates the time and the non null values to the appropriate position in buffer * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored - * @param time the time to be updated in the buffer as the last time - * @param index the index of the vectorized vector which is the last value + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time + * @param index the index of the vectorized vector which is the last value */ void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) { @@ -138,9 +143,9 @@ void updateTimeWithValue(ByteBuffer buf, int position, long time, int index) /** * Updates the time only to the appropriate position in buffer as the value is null * - * @param buf byte buffer storing the byte array representation of the aggregate - * @param position offset within the byte buffer at which the current aggregate value is stored - * @param time the time to be updated in the buffer as the last time + * @param buf byte buffer storing the byte array representation of the aggregate + * @param position offset within the byte buffer at which the current aggregate value is stored + * @param time the time to be updated in the buffer as the last time */ void updateTimeWithNull(ByteBuffer buf, int position, long time) { @@ -149,7 +154,7 @@ void updateTimeWithNull(ByteBuffer buf, int position, long time) } /** - * Abstract function which needs to be overridden by subclasses to set the initial value + *Abstract function which needs to be overridden by subclasses to set the initial value */ abstract void initValue(ByteBuffer buf, int position); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java index 6428b173a1df..0010f7630a4f 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java @@ -67,12 +67,8 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final int[] valueVector = valueDimensionVectorSelector.getRowVector(); firstTime = buf.getLong(position); int index; - long earliestTime; - // Now we are iterating over the values to find the minima as the - // timestamp expression in EARLIEST_BY has no established sorting order - // If we know that the time is already sorted this can be optimized - // for the general EARLIEST call which is always on __time which is sorted + long earliestTime; for (index = startRow; index < endRow; index++) { earliestTime = timeVector[index]; if (earliestTime < firstTime) { @@ -88,45 +84,19 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) { long[] timeVector = timeSelector.getLongVector(); - boolean[] nullTimeVector = timeSelector.getNullVector(); int[] values = valueDimensionVectorSelector.getRowVector(); - // Now we are iterating over the values to find the minima as the - // timestamp expression in EARLIEST_BY has no established sorting order - // If we know that the time is already sorted this can be optimized - // for the general EARLIEST call which is always on __time which is sorted - - // The hotpath is separated out into 2 cases when nullTimeVector - // is null and not-null so that the check is not on every value - if (nullTimeVector != null) { - for (int i = 0; i < numRows; i++) { - if (nullTimeVector[i]) { - continue; - } - int position = positions[i] + positionOffset; - int row = rows == null ? i : rows[i]; - long firstTime = buf.getLong(position); - if (timeVector[row] < firstTime) { - firstTime = timeVector[row]; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - int position = positions[i] + positionOffset; - int row = rows == null ? i : rows[i]; - long firstTime = buf.getLong(position); - if (timeVector[row] < firstTime) { - firstTime = timeVector[row]; - buf.putLong(position, firstTime); - buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); - buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); - } + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + int row = rows == null ? i : rows[i]; + long firstTime = buf.getLong(position); + if (timeVector[row] < firstTime) { + firstTime = timeVector[row]; + buf.putLong(position, firstTime); + buf.put(position + NumericFirstVectorAggregator.NULL_OFFSET, NullHandling.IS_NOT_NULL_BYTE); + buf.putInt(position + NumericFirstVectorAggregator.VALUE_OFFSET, values[row]); } } - } @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java index 0d05833378c6..8a6654fbfdff 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstAggregator.java @@ -56,6 +56,9 @@ public StringFirstAggregator( @Override public void aggregate() { + if (timeSelector.isNull()) { + return; + } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -69,9 +72,6 @@ public void aggregate() firstValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes); } } else { - if (timeSelector.isNull()) { - return; - } final long time = timeSelector.getLong(); if (time < firstTime) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java index 563455c9eefa..fbf2a4156c56 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstBufferAggregator.java @@ -63,6 +63,9 @@ public void init(ByteBuffer buf, int position) @Override public void aggregate(ByteBuffer buf, int position) { + if (timeSelector.isNull()) { + return; + } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -83,9 +86,6 @@ public void aggregate(ByteBuffer buf, int position) } } } else { - if (timeSelector.isNull()) { - return; - } final long time = timeSelector.getLong(); final long firstTime = buf.getLong(position); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java index ee04f2c6980b..b61a78a7c9f6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java @@ -120,9 +120,6 @@ public static SerializablePairLongString readPairFromSelectors( time = pair.lhs; string = pair.rhs; } else if (object != null) { - if (timeSelector.isNull()) { - return null; - } time = timeSelector.getLong(); string = DimensionHandlerUtils.convertObjectToString(object); } else { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 088f84b39316..8daf9fc8297d 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -68,14 +68,11 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); long firstTime = buf.getLong(position); int index; - // Now we are iterating over the values to find the minima as the - // timestamp expression in EARLIEST_BY has no established sorting order - // If we know that the time is already sorted this can be optimized - // for the general EARLIEST call which is always on __time which is sorted - for (index = startRow; index < endRow; index++) { - if (times[index] > firstTime) { + for (int i = startRow; i < endRow; i++) { + if (times[i] > firstTime) { continue; } + index = i; final boolean foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(objectsWhichMightBeStrings[index]); if (foldNeeded) { final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( @@ -135,7 +132,6 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in long firstTime = buf.getLong(position); if (timeVector[row] < firstTime) { if (foldNeeded) { - firstTime = timeVector[row]; final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromVectorSelectorsAtIndex( timeSelector, valueSelector, diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java index f1dbab60938b..a7c33c8ad23e 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastAggregator.java @@ -57,6 +57,9 @@ public StringLastAggregator( @Override public void aggregate() { + if (timeSelector.isNull()) { + return; + } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -70,9 +73,6 @@ public void aggregate() lastValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes); } } else { - if (timeSelector.isNull()) { - return; - } final long time = timeSelector.getLong(); if (time >= lastTime) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java index 3f78745f5fad..8611ef72365a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastBufferAggregator.java @@ -64,6 +64,9 @@ public void init(ByteBuffer buf, int position) @Override public void aggregate(ByteBuffer buf, int position) { + if (timeSelector.isNull()) { + return; + } if (needsFoldCheck) { // Less efficient code path when folding is a possibility (we must read the value selector first just in case // it's a foldable object). @@ -84,9 +87,6 @@ public void aggregate(ByteBuffer buf, int position) } } } else { - if (timeSelector.isNull()) { - return; - } final long time = timeSelector.getLong(); final long lastTime = buf.getLong(position); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index dbb382f9270c..13a6afcb83cb 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -707,33 +707,6 @@ public void testLatestVectorAggregators() ); } - @Test - public void testEarliestVectorAggregators() - { - notMsqCompatible(); - testQuery( - "SELECT " - + "EARLIEST(dim1, 10) " - + "FROM druid.numfoo", - ImmutableList.of( - Druids.newTimeseriesQueryBuilder() - .dataSource(CalciteTests.DATASOURCE3) - .intervals(querySegmentSpec(Filtration.eternity())) - .granularity(Granularities.ALL) - .aggregators( - aggregators( - new StringFirstAggregatorFactory("a0", "dim1", "__time", 10) - ) - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), - ImmutableList.of( - new Object[]{""} - ) - ); - } - @Test public void testLatestAggregators() { From 3664b875cb6d67da46c1dd15382044a33f8092b1 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 22 Aug 2023 08:10:07 -0700 Subject: [PATCH 18/20] fixing an issue with latest agg --- .../query/aggregation/last/StringLastVectorAggregator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastVectorAggregator.java index a9c0b1e9ade1..a18a1d4c9631 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/last/StringLastVectorAggregator.java @@ -73,8 +73,8 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) if (objectsWhichMightBeStrings[i] == null) { continue; } - if (times[i] < lastTime) { - break; + if (times[i] <= lastTime) { + continue; } index = i; final boolean foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(objectsWhichMightBeStrings[index]); From 83a784a9749c0462a31119442832786aa5cb57c0 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 22 Aug 2023 16:41:22 -0700 Subject: [PATCH 19/20] Adding more comments and removing an unnecessary check --- .../first/DoubleFirstAggregatorFactory.java | 2 +- .../first/FloatFirstAggregatorFactory.java | 2 +- .../first/LongFirstAggregatorFactory.java | 2 +- .../first/NumericFirstVectorAggregator.java | 12 +++++++----- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java index 8d180c62a733..61ba4b572c13 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/DoubleFirstAggregatorFactory.java @@ -144,7 +144,7 @@ public VectorAggregator factorizeVector( ) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && Types.isNumeric(capabilities)) { + if (Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java index 183c87772aa2..9c182aae469a 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/FloatFirstAggregatorFactory.java @@ -134,7 +134,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && Types.isNumeric(capabilities)) { + if (Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector(timeColumn); return new FloatFirstVectorAggregator(timeSelector, valueSelector); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java index 2727cdb1426c..ea400221b62f 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/LongFirstAggregatorFactory.java @@ -133,7 +133,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) { ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && Types.isNumeric(capabilities)) { + if (Types.isNumeric(capabilities)) { VectorValueSelector valueSelector = columnSelectorFactory.makeValueSelector(fieldName); VectorValueSelector timeSelector = columnSelectorFactory.makeValueSelector( timeColumn); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java index 7aaeb5060457..7fcd10352da9 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/NumericFirstVectorAggregator.java @@ -60,12 +60,14 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) final boolean[] nullTimeVector = timeSelector.getNullVector(); final boolean[] nullValueVector = valueSelector.getNullVector(); firstTime = buf.getLong(position); - // check if nullVector is found or not - // the nullVector is null if no null values are found - // set the nullAbsent flag accordingly - // the time vector is already sorted so the first element would be the earliest - // traverse accordingly + // the time vector is already sorted + // if earliest is on the default time dimension + // but if earliest uses earliest_by it might use a secondary timestamp + // which is not sorted. For correctness, we need to go over all elements. + // A possible optimization here is to have 2 paths one for earliest where + // we can take advantage of the sorted nature of time + // and the earliest_by where we have to go over all elements. int index; for (int i = startRow; i < endRow; i++) { From f4ddb7c4e6ed488119476a2ce1b6426a3a2f967e Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Thu, 24 Aug 2023 17:30:08 -0700 Subject: [PATCH 20/20] Addressing null checks for tie selector and only vectorize false for quantile sketches --- .../queries/wikipedia_editstream_queries.json | 57 +++++++------------ ...eStringFirstDimensionVectorAggregator.java | 12 +++- .../first/StringFirstVectorAggregator.java | 16 ++++-- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 323269849475..4cb4c0ec4857 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -13,8 +13,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -367,8 +366,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -403,8 +401,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -997,8 +994,7 @@ }, "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1073,8 +1069,7 @@ }, "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1138,8 +1133,7 @@ ], "dimensions": ["namespace"], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1354,8 +1348,7 @@ "value": "league_of_legends" }, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1383,8 +1376,7 @@ "queryType": "timeBoundary", "dataSource": "wikipedia_editstream", "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1473,8 +1465,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1534,8 +1525,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1599,8 +1589,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1689,8 +1678,7 @@ "limit": 3 }, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1742,8 +1730,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1785,8 +1772,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1847,8 +1833,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1868,8 +1853,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1906,8 +1890,7 @@ "metric": "rows", "threshold": 3, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -1927,8 +1910,7 @@ } ], "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } @@ -2027,8 +2009,7 @@ "limit": 3 }, "context": { - "useCache": "true", - "vectorize": "false", + "useCache": "true", "populateCache": "true", "timeout": 360000 } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java index 0010f7630a4f..d5aa31444e0e 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/SingleStringFirstDimensionVectorAggregator.java @@ -64,12 +64,16 @@ public void init(ByteBuffer buf, int position) public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) { final long[] timeVector = timeSelector.getLongVector(); + final boolean[] nullTimeVector = timeSelector.getNullVector(); final int[] valueVector = valueDimensionVectorSelector.getRowVector(); firstTime = buf.getLong(position); int index; long earliestTime; for (index = startRow; index < endRow; index++) { + if (nullTimeVector != null && nullTimeVector[index]) { + continue; + } earliestTime = timeVector[index]; if (earliestTime < firstTime) { firstTime = earliestTime; @@ -83,9 +87,13 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) @Override public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) { - long[] timeVector = timeSelector.getLongVector(); - int[] values = valueDimensionVectorSelector.getRowVector(); + final long[] timeVector = timeSelector.getLongVector(); + final boolean[] nullTimeVector = timeSelector.getNullVector(); + final int[] values = valueDimensionVectorSelector.getRowVector(); for (int i = 0; i < numRows; i++) { + if (nullTimeVector != null && nullTimeVector[i]) { + continue; + } int position = positions[i] + positionOffset; int row = rows == null ? i : rows[i]; long firstTime = buf.getLong(position); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java index 8daf9fc8297d..7082b4c3dd00 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstVectorAggregator.java @@ -64,14 +64,18 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) if (timeSelector == null) { return; } - long[] times = timeSelector.getLongVector(); - Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); + final long[] times = timeSelector.getLongVector(); + final boolean[] nullTimeVector = timeSelector.getNullVector(); + final Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); long firstTime = buf.getLong(position); int index; for (int i = startRow; i < endRow; i++) { if (times[i] > firstTime) { continue; } + if (nullTimeVector != null && nullTimeVector[i]) { + continue; + } index = i; final boolean foldNeeded = StringFirstLastUtils.objectNeedsFoldCheck(objectsWhichMightBeStrings[index]); if (foldNeeded) { @@ -111,8 +115,9 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) @Override public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) { - long[] timeVector = timeSelector.getLongVector(); - Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); + final long[] timeVector = timeSelector.getLongVector(); + final boolean[] nullTimeVector = timeSelector.getNullVector(); + final Object[] objectsWhichMightBeStrings = valueSelector.getObjectVector(); // iterate once over the object vector to find first non null element and // determine if the type is Pair or not @@ -127,6 +132,9 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in } for (int i = 0; i < numRows; i++) { + if (nullTimeVector != null && nullTimeVector[i]) { + continue; + } int position = positions[i] + positionOffset; int row = rows == null ? i : rows[i]; long firstTime = buf.getLong(position);