From cc38ecc5454d09b7966b87b9c1569ac930d697e6 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal Date: Wed, 19 Aug 2020 23:47:31 +0530 Subject: [PATCH 01/10] First draft --- ...ApproximateHistogramAggregatorFactory.java | 18 +++ .../ApproximateHistogramBufferAggregator.java | 34 +----- ...mateHistogramBufferAggregatorInternal.java | 66 +++++++++++ ...mateHistogramFoldingAggregatorFactory.java | 20 +++- ...imateHistogramFoldingBufferAggregator.java | 40 +------ ...togramFoldingBufferAggregatorInternal.java | 94 ++++++++++++++++ ...imateHistogramFoldingVectorAggregator.java | 89 +++++++++++++++ .../ApproximateHistogramVectorAggregator.java | 92 ++++++++++++++++ ...ixedBucketsHistogramAggregatorFactory.java | 21 ++++ ...FixedBucketsHistogramBufferAggregator.java | 37 +------ ...ketsHistogramBufferAggregatorInternal.java | 104 ++++++++++++++++++ ...FixedBucketsHistogramVectorAggregator.java | 90 +++++++++++++++ 12 files changed, 609 insertions(+), 96 deletions(-) create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java create mode 100644 extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java index bc1295466764..0d977456ca9d 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java @@ -32,9 +32,12 @@ import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.ObjectAggregateCombiner; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -102,6 +105,21 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) ); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory metricVectorFactory) + { + return new ApproximateHistogramVectorAggregator( + metricVectorFactory.makeValueSelector(fieldName), + resolution + ); + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Comparator getComparator() { diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java index 4390a63ac600..2222ce635015 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java @@ -28,54 +28,30 @@ public class ApproximateHistogramBufferAggregator implements BufferAggregator { private final BaseFloatColumnValueSelector selector; - private final int resolution; + private final ApproximateHistogramBufferAggregatorInternal innerAggregator; public ApproximateHistogramBufferAggregator(BaseFloatColumnValueSelector selector, int resolution) { this.selector = selector; - this.resolution = resolution; + this.innerAggregator = new ApproximateHistogramBufferAggregatorInternal(resolution); } @Override public void init(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - - mutationBuffer.putInt(resolution); - mutationBuffer.putInt(0); //initial binCount - for (int i = 0; i < resolution; ++i) { - mutationBuffer.putFloat(0f); - } - for (int i = 0; i < resolution; ++i) { - mutationBuffer.putLong(0L); - } - - // min - mutationBuffer.putFloat(Float.POSITIVE_INFINITY); - // max - mutationBuffer.putFloat(Float.NEGATIVE_INFINITY); + innerAggregator.init(buf, position); } @Override public void aggregate(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - - ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer); - h0.offer(selector.getFloat()); - - mutationBuffer.position(position); - h0.toBytesDense(mutationBuffer); + innerAggregator.aggregate(buf, position, selector.getFloat()); } @Override public Object get(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - return ApproximateHistogram.fromBytes(mutationBuffer); + return innerAggregator.get(buf, position); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java new file mode 100644 index 000000000000..a1a8ea16cbb5 --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import java.nio.ByteBuffer; + +class ApproximateHistogramBufferAggregatorInternal +{ + private final int resolution; + + public ApproximateHistogramBufferAggregatorInternal(int resolution) + { + this.resolution = resolution; + } + + public void init(final ByteBuffer buf, final int position) + { + ApproximateHistogram histogram = new ApproximateHistogram(resolution); + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + histogram.toBytesDense(mutationBuffer); + } + + public ApproximateHistogram get(final ByteBuffer buf, final int position) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + return ApproximateHistogram.fromBytesDense(mutationBuffer); + } + + public void put(final ByteBuffer buf, final int position, final ApproximateHistogram histogram) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + histogram.toBytesDense(mutationBuffer); + } + + public void aggregate(final ByteBuffer buf, final int position, final float value) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + + ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer); + h0.offer(value); + + mutationBuffer.position(position); + h0.toBytesDense(mutationBuffer); + } +} diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java index becbfe1ae64f..5e15c4d9b2e4 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java @@ -27,9 +27,12 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; import java.util.Objects; @@ -93,10 +96,25 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) ); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory metricVectorFactory) + { + VectorObjectSelector selector = metricVectorFactory.makeObjectSelector(fieldName); + return new ApproximateHistogramFoldingVectorAggregator(selector, resolution, lowerLimit, upperLimit); + } + @Override public AggregatorFactory getCombiningFactory() { - return new ApproximateHistogramFoldingAggregatorFactory(name, name, resolution, numBuckets, lowerLimit, upperLimit, finalizeAsBase64Binary); + return new ApproximateHistogramFoldingAggregatorFactory( + name, + name, + resolution, + numBuckets, + lowerLimit, + upperLimit, + finalizeAsBase64Binary + ); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java index bd9d0cd39aab..96c385b2de54 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java @@ -28,12 +28,7 @@ public class ApproximateHistogramFoldingBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; - private final int resolution; - private final float upperLimit; - private final float lowerLimit; - - private float[] tmpBufferP; - private long[] tmpBufferB; + private final ApproximateHistogramFoldingBufferAggregatorInternal innerAggregator; public ApproximateHistogramFoldingBufferAggregator( BaseObjectColumnValueSelector selector, @@ -43,50 +38,26 @@ public ApproximateHistogramFoldingBufferAggregator( ) { this.selector = selector; - this.resolution = resolution; - this.lowerLimit = lowerLimit; - this.upperLimit = upperLimit; - - tmpBufferP = new float[resolution]; - tmpBufferB = new long[resolution]; + this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorInternal(resolution, lowerLimit, upperLimit); } @Override public void init(ByteBuffer buf, int position) { - ApproximateHistogram h = new ApproximateHistogram(resolution, lowerLimit, upperLimit); - - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - // use dense storage for aggregation - h.toBytesDense(mutationBuffer); + innerAggregator.init(buf, position); } @Override public void aggregate(ByteBuffer buf, int position) { ApproximateHistogram hNext = selector.getObject(); - if (hNext == null) { - return; - } - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - - ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer); - h0.setLowerLimit(lowerLimit); - h0.setUpperLimit(upperLimit); - h0.foldFast(hNext, tmpBufferP, tmpBufferB); - - mutationBuffer.position(position); - h0.toBytesDense(mutationBuffer); + innerAggregator.aggregate(buf, position, hNext); } @Override public Object get(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.asReadOnlyBuffer(); - mutationBuffer.position(position); - return ApproximateHistogram.fromBytesDense(mutationBuffer); + return innerAggregator.get(buf, position); } @Override @@ -106,6 +77,7 @@ public double getDouble(ByteBuffer buf, int position) { throw new UnsupportedOperationException("ApproximateHistogramFoldingBufferAggregator does not support getDouble()"); } + @Override public void close() { diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java new file mode 100644 index 000000000000..f6cc16efd71a --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class ApproximateHistogramFoldingBufferAggregatorInternal +{ + private final int resolution; + private final float upperLimit; + private final float lowerLimit; + + private float[] tmpBufferA; + private long[] tmpBufferB; + + public ApproximateHistogramFoldingBufferAggregatorInternal( + int resolution, + float lowerLimit, + float upperLimit + ) + { + this.resolution = resolution; + this.lowerLimit = lowerLimit; + this.upperLimit = upperLimit; + + tmpBufferA = new float[resolution]; + tmpBufferB = new long[resolution]; + } + + public void init(ByteBuffer buf, int position) + { + ApproximateHistogram h = new ApproximateHistogram(resolution, lowerLimit, upperLimit); + + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + // use dense storage for aggregation + h.toBytesDense(mutationBuffer); + } + + public void aggregate(ByteBuffer buf, int position, @Nullable ApproximateHistogram hNext) + { + if (hNext == null) { + return; + } + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + + ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer); + foldFast(h0, hNext); + + mutationBuffer.position(position); + h0.toBytesDense(mutationBuffer); + } + + public void foldFast(ApproximateHistogram left, ApproximateHistogram right) + { + //TODO: do these have to set in every call + left.setLowerLimit(lowerLimit); + left.setUpperLimit(upperLimit); + left.foldFast(right, tmpBufferA, tmpBufferB); + } + + public ApproximateHistogram get(ByteBuffer buf, int position) + { + ByteBuffer mutationBuffer = buf.asReadOnlyBuffer(); + mutationBuffer.position(position); + return ApproximateHistogram.fromBytesDense(mutationBuffer); + } + + public void put(ByteBuffer buf, int position, ApproximateHistogram histogram) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + histogram.toBytesDense(mutationBuffer); + } +} diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java new file mode 100644 index 000000000000..5b50c718427f --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class ApproximateHistogramFoldingVectorAggregator implements VectorAggregator +{ + private final ApproximateHistogramFoldingBufferAggregatorInternal innerAggregator; + private final VectorObjectSelector selector; + + public ApproximateHistogramFoldingVectorAggregator( + final VectorObjectSelector selector, + final int resolution, + final float lowerLimit, + final float upperLimit + ) + { + this.selector = selector; + this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorInternal(resolution, lowerLimit, upperLimit); + } + + @Override + public void init(ByteBuffer buf, int position) + { + innerAggregator.init(buf, position); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + Object[] vector = selector.getObjectVector(); + ApproximateHistogram histogram = innerAggregator.get(buf, position); + for (int i = startRow; i < endRow; i++) { + ApproximateHistogram other = (ApproximateHistogram) vector[i]; + if (null != other) { + innerAggregator.foldFast(histogram, other); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + Object[] vector = selector.getObjectVector(); + for (int i = 0; i < numRows; i++) { + ApproximateHistogram other = (ApproximateHistogram) vector[null != rows ? rows[i] : i]; + if (null == other) { + continue; + } + int position = positions[i] + positionOffset; + innerAggregator.aggregate(buf, position, other); + } + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return innerAggregator.get(buf, position); + } + + @Override + public void close() + { + + } +} diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java new file mode 100644 index 000000000000..a30a36f534ba --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class ApproximateHistogramVectorAggregator implements VectorAggregator +{ + + private final VectorValueSelector selector; + private final ApproximateHistogramBufferAggregatorInternal innerAggregator; + + public ApproximateHistogramVectorAggregator( + VectorValueSelector selector, + int resolution + ) + { + this.selector = selector; + this.innerAggregator = new ApproximateHistogramBufferAggregatorInternal(resolution); + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + innerAggregator.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final boolean[] isValueNull = selector.getNullVector(); + final float[] vector = selector.getFloatVector(); + ApproximateHistogram histogram = innerAggregator.get(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (isValueNull != null && isValueNull[i]) { + continue; + } + histogram.offer(vector[i]); + } + innerAggregator.put(buf, position, histogram); + } + + @Override + public Object get(ByteBuffer buf, int position) + { + return innerAggregator.get(buf, position); + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final float[] vector = selector.getFloatVector(); + final boolean[] isValueNull = selector.getNullVector(); + + for (int i = 0; i < numRows; i++) { + if (isValueNull != null && isValueNull[i]) { + continue; + } + final int position = positions[i] + positionOffset; + innerAggregator.aggregate(buf, position, vector[rows != null ? rows[i] : i]); + } + } + + + @Override + public void close() + { + // no resources to cleanup + } +} diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregatorFactory.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregatorFactory.java index b8df5b62e0cb..5d3bd0b33395 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregatorFactory.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregatorFactory.java @@ -29,9 +29,12 @@ import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.ObjectAggregateCombiner; +import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.util.Collections; @@ -99,6 +102,24 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) ); } + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory columnSelectorFactory) + { + return new FixedBucketsHistogramVectorAggregator( + columnSelectorFactory.makeObjectSelector(fieldName), + lowerLimit, + upperLimit, + numBuckets, + outlierHandlingMode + ); + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + @Override public Comparator getComparator() { diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java index 0e894d7a59d3..2d3feff11687 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java @@ -19,7 +19,6 @@ package org.apache.druid.query.aggregation.histogram; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseObjectColumnValueSelector; @@ -29,8 +28,7 @@ public class FixedBucketsHistogramBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; - - private FixedBucketsHistogram histogram; + private final FixedBucketsHistogramBufferAggregatorInternal innerAggregator; public FixedBucketsHistogramBufferAggregator( BaseObjectColumnValueSelector selector, @@ -41,7 +39,7 @@ public FixedBucketsHistogramBufferAggregator( ) { this.selector = selector; - this.histogram = new FixedBucketsHistogram( + this.innerAggregator = new FixedBucketsHistogramBufferAggregatorInternal( lowerLimit, upperLimit, numBuckets, @@ -52,45 +50,20 @@ public FixedBucketsHistogramBufferAggregator( @Override public void init(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - mutationBuffer.put(histogram.toBytesFull(false)); + innerAggregator.init(buf, position); } @Override public void aggregate(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - - FixedBucketsHistogram h0 = FixedBucketsHistogram.fromByteBufferFullNoSerdeHeader(mutationBuffer); - Object val = selector.getObject(); - if (val == null) { - if (NullHandling.replaceWithDefault()) { - h0.incrementMissing(); - } else { - h0.add(NullHandling.defaultDoubleValue()); - } - } else if (val instanceof String) { - h0.combineHistogram(FixedBucketsHistogram.fromBase64((String) val)); - } else if (val instanceof FixedBucketsHistogram) { - h0.combineHistogram((FixedBucketsHistogram) val); - } else { - Double x = ((Number) val).doubleValue(); - h0.add(x); - } - - mutationBuffer.position(position); - mutationBuffer.put(h0.toBytesFull(false)); + innerAggregator.aggregate(buf, position, val); } @Override public Object get(ByteBuffer buf, int position) { - ByteBuffer mutationBuffer = buf.duplicate(); - mutationBuffer.position(position); - return FixedBucketsHistogram.fromByteBufferFullNoSerdeHeader(mutationBuffer); + return innerAggregator.get(buf, position); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java new file mode 100644 index 000000000000..8f2c9d2c7111 --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.common.config.NullHandling; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class FixedBucketsHistogramBufferAggregatorInternal +{ + private final double lowerLimit; + private final double upperLimit; + private final int numBuckets; + private final FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode; + + public FixedBucketsHistogramBufferAggregatorInternal( + double lowerLimit, + double upperLimit, + int numBuckets, + FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode + ) + { + + this.lowerLimit = lowerLimit; + this.upperLimit = upperLimit; + this.numBuckets = numBuckets; + this.outlierHandlingMode = outlierHandlingMode; + } + + public void init(ByteBuffer buf, int position) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + FixedBucketsHistogram histogram = new FixedBucketsHistogram( + lowerLimit, + upperLimit, + numBuckets, + outlierHandlingMode + ); + mutationBuffer.put(histogram.toBytesFull(false)); + } + + public void aggregate(ByteBuffer buf, int position, @Nullable Object val) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + + FixedBucketsHistogram h0 = FixedBucketsHistogram.fromByteBufferFullNoSerdeHeader(mutationBuffer); + combine(h0, val); + + mutationBuffer.position(position); + mutationBuffer.put(h0.toBytesFull(false)); + } + + public void combine(FixedBucketsHistogram histogram, @Nullable Object next) + { + if (next == null) { + if (NullHandling.replaceWithDefault()) { + histogram.incrementMissing(); + } else { + histogram.add(NullHandling.defaultDoubleValue()); + } + } else if (next instanceof String) { + histogram.combineHistogram(FixedBucketsHistogram.fromBase64((String) next)); + } else if (next instanceof FixedBucketsHistogram) { + histogram.combineHistogram((FixedBucketsHistogram) next); + } else { + Double x = ((Number) next).doubleValue(); + histogram.add(x); + } + } + + public FixedBucketsHistogram get(ByteBuffer buf, int position) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + return FixedBucketsHistogram.fromByteBufferFullNoSerdeHeader(mutationBuffer); + } + + public void put(ByteBuffer buf, int position, FixedBucketsHistogram histogram) + { + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + mutationBuffer.put(histogram.toBytesFull(false)); + } +} diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java new file mode 100644 index 000000000000..f9af010a5956 --- /dev/null +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class FixedBucketsHistogramVectorAggregator implements VectorAggregator +{ + private final VectorObjectSelector selector; + private final FixedBucketsHistogramBufferAggregatorInternal innerAggregator; + + public FixedBucketsHistogramVectorAggregator( + VectorObjectSelector selector, + double lowerLimit, + double upperLimit, + int numBuckets, + FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode + ) + { + this.selector = selector; + this.innerAggregator = new FixedBucketsHistogramBufferAggregatorInternal( + lowerLimit, + upperLimit, + numBuckets, + outlierHandlingMode + ); + } + + @Override + public void init(ByteBuffer buf, int position) + { + innerAggregator.init(buf, position); + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + Object[] vector = selector.getObjectVector(); + FixedBucketsHistogram histogram = innerAggregator.get(buf, position); + for (int i = startRow; i < endRow; i++) { + innerAggregator.combine(histogram, vector[i]); + } + innerAggregator.put(buf, position, histogram); + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + Object[] vector = selector.getObjectVector(); + for (int i = 0; i < numRows; i++) { + int position = positions[i] + positionOffset; + Object val = vector[rows != null ? rows[i] : i]; + innerAggregator.aggregate(buf, position, val); + } + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return innerAggregator.get(buf, position); + } + + @Override + public void close() + { + // Nothing to close + } +} From 30aa2515ad0efe45d598b871f596063e9d24dfc7 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Thu, 20 Aug 2020 21:00:18 +0530 Subject: [PATCH 02/10] Remove redundant code from FixedBucketsHistogramAggregator classes --- docs/querying/query-context.md | 3 +- ...mateHistogramBufferAggregatorInternal.java | 6 +++- ...togramFoldingBufferAggregatorInternal.java | 7 ++++- ...imateHistogramFoldingVectorAggregator.java | 2 +- .../histogram/FixedBucketsHistogram.java | 29 ++++++++++++++++++ .../FixedBucketsHistogramAggregator.java | 19 +----------- ...ketsHistogramBufferAggregatorInternal.java | 30 +++++-------------- ...FixedBucketsHistogramVectorAggregator.java | 2 +- 8 files changed, 52 insertions(+), 46 deletions(-) diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md index 12d51c9e45a3..a1a62a05f47c 100644 --- a/docs/querying/query-context.md +++ b/docs/querying/query-context.md @@ -90,7 +90,8 @@ requirements: include "selector", "bound", "in", "like", "regex", "search", "and", "or", and "not". - All filters in filtered aggregators must offer vectorized row-matchers. - All aggregators must offer vectorized implementations. These include "count", "doubleSum", "floatSum", "longSum", "longMin", - "longMax", "doubleMin", "doubleMax", "floatMin", "floatMax", "hyperUnique", and "filtered". + "longMax", "doubleMin", "doubleMax", "floatMin", "floatMax", "hyperUnique", "filtered", "approxHistogram", + "approxHistogramFold", and "fixedBucketsHistogram (with numerical input)". - No virtual columns. - For GroupBy: All dimension specs must be "default" (no extraction functions or filtered dimension specs). - For GroupBy: No multi-value dimensions. diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java index a1a8ea16cbb5..454095b7861c 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java @@ -21,7 +21,11 @@ import java.nio.ByteBuffer; -class ApproximateHistogramBufferAggregatorInternal +/** + * A helper class used by {@link ApproximateHistogramBufferAggregator} and {@link ApproximateHistogramVectorAggregator} + * for aggregation operations on byte buffers. Getting the object from value selectors is outside this class. + */ +final class ApproximateHistogramBufferAggregatorInternal { private final int resolution; diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java index f6cc16efd71a..b9c9c0ec6591 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java @@ -22,7 +22,12 @@ import javax.annotation.Nullable; import java.nio.ByteBuffer; -public class ApproximateHistogramFoldingBufferAggregatorInternal +/** + * A helper class used by {@link ApproximateHistogramFoldingBufferAggregator} and + * {@link ApproximateHistogramFoldingVectorAggregator} for aggregation operations on byte buffers. + * Getting the object from value selectors is outside this class. + */ +final class ApproximateHistogramFoldingBufferAggregatorInternal { private final int resolution; private final float upperLimit; diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java index fa12dc79958d..41e4306003c9 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java @@ -85,6 +85,6 @@ public Object get(ByteBuffer buf, int position) @Override public void close() { - + // Nothing to close } } diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java index f408dbfb857e..980a760d952f 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java @@ -25,9 +25,11 @@ import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -431,6 +433,33 @@ public void incrementMissing() } } + /** + * Merge another datapoint into this one. The other datapoin could be + * - base64 encoded string of {@code FixedBucketsHistogram} + * - {@code FixedBucketsHistogram} object + * - Numeric value + * + * @param val + */ + void combine(@Nullable Object val) + { + if (val == null) { + if (NullHandling.replaceWithDefault()) { + add(NullHandling.defaultDoubleValue()); + } else { + incrementMissing(); + } + } else if (val instanceof String) { + combineHistogram(FixedBucketsHistogram.fromBase64((String) val)); + } else if (val instanceof FixedBucketsHistogram) { + combineHistogram((FixedBucketsHistogram) val); + } else if (val instanceof Number) { + add(((Number) val).doubleValue()); + } else { + throw new ISE("Unknown class for object: " + val.getClass()); + } + } + /** * Merge another histogram into this one. Only the state of this histogram is updated. * diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregator.java index 2f5b56508933..eed1c9e7d380 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregator.java @@ -20,8 +20,6 @@ package org.apache.druid.query.aggregation.histogram; import com.google.common.primitives.Longs; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.segment.BaseObjectColumnValueSelector; @@ -66,22 +64,7 @@ public FixedBucketsHistogramAggregator( public void aggregate() { Object val = selector.getObject(); - - if (val == null) { - if (NullHandling.replaceWithDefault()) { - histogram.add(NullHandling.defaultDoubleValue()); - } else { - histogram.incrementMissing(); - } - } else if (val instanceof String) { - histogram.combineHistogram(FixedBucketsHistogram.fromBase64((String) val)); - } else if (val instanceof FixedBucketsHistogram) { - histogram.combineHistogram((FixedBucketsHistogram) val); - } else if (val instanceof Number) { - histogram.add(((Number) val).doubleValue()); - } else { - throw new ISE("Unknown class for object: " + val.getClass()); - } + histogram.combine(val); } @Nullable diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java index d013929bd720..b61e639c6a76 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java @@ -19,12 +19,15 @@ package org.apache.druid.query.aggregation.histogram; -import org.apache.druid.common.config.NullHandling; - import javax.annotation.Nullable; import java.nio.ByteBuffer; -public class FixedBucketsHistogramBufferAggregatorInternal +/** + * A helper class used by {@link FixedBucketsHistogramBufferAggregator} and + * {@link FixedBucketsHistogramVectorAggregator} for aggregation operations on byte buffers. + * Getting the object from value selectors is outside this class. + */ +final class FixedBucketsHistogramBufferAggregatorInternal { private final double lowerLimit; private final double upperLimit; @@ -38,7 +41,6 @@ public FixedBucketsHistogramBufferAggregatorInternal( FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode ) { - this.lowerLimit = lowerLimit; this.upperLimit = upperLimit; this.numBuckets = numBuckets; @@ -64,30 +66,12 @@ public void aggregate(ByteBuffer buf, int position, @Nullable Object val) mutationBuffer.position(position); FixedBucketsHistogram h0 = FixedBucketsHistogram.fromByteBufferFullNoSerdeHeader(mutationBuffer); - combine(h0, val); + h0.combine(val); mutationBuffer.position(position); mutationBuffer.put(h0.toBytesFull(false)); } - public void combine(FixedBucketsHistogram histogram, @Nullable Object next) - { - if (next == null) { - if (NullHandling.replaceWithDefault()) { - histogram.incrementMissing(); - } else { - histogram.add(NullHandling.defaultDoubleValue()); - } - } else if (next instanceof String) { - histogram.combineHistogram(FixedBucketsHistogram.fromBase64((String) next)); - } else if (next instanceof FixedBucketsHistogram) { - histogram.combineHistogram((FixedBucketsHistogram) next); - } else { - double x = ((Number) next).doubleValue(); - histogram.add(x); - } - } - public FixedBucketsHistogram get(ByteBuffer buf, int position) { ByteBuffer mutationBuffer = buf.duplicate(); diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java index 7a83b96fbaa2..2b7b8cb16734 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java @@ -60,7 +60,7 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) boolean[] isNull = selector.getNullVector(); FixedBucketsHistogram histogram = innerAggregator.get(buf, position); for (int i = startRow; i < endRow; i++) { - innerAggregator.combine(histogram, toObject(vector, isNull, i)); + histogram.combine(toObject(vector, isNull, i)); } innerAggregator.put(buf, position, histogram); } From b47c90634c746d66812736b6202a976ea7dc7cb2 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Fri, 21 Aug 2020 20:53:01 +0530 Subject: [PATCH 03/10] Add test cases for new classes --- .../histogram/FixedBucketsHistogram.java | 2 +- ...eHistogramFoldingVectorAggregatorTest.java | 117 ++++++++++ ...roximateHistogramVectorAggregatorTest.java | 136 ++++++++++++ .../histogram/FixedBucketsHistogramTest.java | 88 ++++++++ ...dBucketsHistogramVectorAggregatorTest.java | 208 ++++++++++++++++++ 5 files changed, 550 insertions(+), 1 deletion(-) create mode 100644 extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java create mode 100644 extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java create mode 100644 extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java index 980a760d952f..61e0caf0265e 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java @@ -450,7 +450,7 @@ void combine(@Nullable Object val) incrementMissing(); } } else if (val instanceof String) { - combineHistogram(FixedBucketsHistogram.fromBase64((String) val)); + combineHistogram(fromBase64((String) val)); } else if (val instanceof FixedBucketsHistogram) { combineHistogram((FixedBucketsHistogram) val); } else if (val instanceof Number) { diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java new file mode 100644 index 000000000000..893a720856e1 --- /dev/null +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.easymock.EasyMock; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.easymock.EasyMock.createMock; +import static org.easymock.EasyMock.expect; + +public class ApproximateHistogramFoldingVectorAggregatorTest +{ + private static final float[] FLOATS = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45}; + private VectorColumnSelectorFactory vectorColumnSelectorFactory; + private ApproximateHistogram h1; + private ApproximateHistogram h2; + + @Before + public void setup() + { + + h1 = new ApproximateHistogram(5); + h2 = new ApproximateHistogram(5); + + for (int i = 0; i < 5; ++i) { + h1.offer(FLOATS[i]); + } + for (int i = 5; i < FLOATS.length; ++i) { + h2.offer(FLOATS[i]); + } + + VectorObjectSelector vectorObjectSelector = createMock(VectorObjectSelector.class); + expect(vectorObjectSelector.getObjectVector()).andReturn(new Object[]{h1, null, h2, null}).anyTimes(); + + EasyMock.replay(vectorObjectSelector); + + vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); + expect(vectorColumnSelectorFactory.makeObjectSelector("field")) + .andReturn(vectorObjectSelector).anyTimes(); + EasyMock.replay(vectorColumnSelectorFactory); + } + + @Test + public void testAggregateSinglePosition() + { + ApproximateHistogramFoldingAggregatorFactory factory = buildHistogramFactory(); + ByteBuffer byteBuffer = ByteBuffer.allocate(factory.getMaxIntermediateSize()); + Assert.assertTrue(factory.canVectorize(vectorColumnSelectorFactory)); + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.aggregate(byteBuffer, 0, 0, 4); + ApproximateHistogram h = (ApproximateHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertArrayEquals(new float[]{19.6f, 45.0f}, h.positions(), 0.1f); + Assert.assertArrayEquals(new long[]{9, 1}, h.bins()); + Assert.assertEquals(10, h.count()); + Assert.assertEquals(2.0f, h.min(), 0.1f); + Assert.assertEquals(45.0f, h.max(), 0.1f); + } + + @Test + public void testAggregateMultiPositions() + { + ApproximateHistogramFoldingAggregatorFactory factory = buildHistogramFactory(); + ByteBuffer byteBuffer = ByteBuffer.allocate(factory.getMaxIntermediateSize() * 2); + int[] positions = new int[]{0, factory.getMaxIntermediateSize()}; + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.init(byteBuffer, positions[1]); + + vectorAggregator.aggregate(byteBuffer, 2, positions, null, 0); + vectorAggregator.aggregate(byteBuffer, 2, positions, new int[]{1, 2}, 0); // indirection + ApproximateHistogram actualH1 = (ApproximateHistogram) vectorAggregator.get(byteBuffer, 0); + ApproximateHistogram actualH2 = (ApproximateHistogram) vectorAggregator.get(byteBuffer, positions[1]); + + Assert.assertEquals(actualH1, h1); + Assert.assertEquals(actualH2, h2); + + } + + private ApproximateHistogramFoldingAggregatorFactory buildHistogramFactory() + { + return new ApproximateHistogramFoldingAggregatorFactory( + "approximateHistoFold", + "field", + 5, + 5, + 0f, + 50.0f, + false + ); + } +} diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java new file mode 100644 index 000000000000..3716e154aa9e --- /dev/null +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.easymock.EasyMock; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.easymock.EasyMock.createMock; +import static org.easymock.EasyMock.expect; + +public class ApproximateHistogramVectorAggregatorTest +{ + private static final float[] FLOATS = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45, 33}; // Last value is never included + private static final boolean[] NULL_VECTOR = + {false, false, false, false, false, false, false, false, false, false, true}; + private VectorColumnSelectorFactory vectorColumnSelectorFactory; + + @Before + public void setup() + { + NullHandling.initializeForTests(); + VectorValueSelector vectorValueSelector_1 = createMock(VectorValueSelector.class); + expect(vectorValueSelector_1.getFloatVector()).andReturn(FLOATS).anyTimes(); + expect(vectorValueSelector_1.getNullVector()).andReturn(NULL_VECTOR).anyTimes(); + + VectorValueSelector vectorValueSelector_2 = createMock(VectorValueSelector.class); + expect(vectorValueSelector_2.getFloatVector()).andReturn(FLOATS).anyTimes(); + expect(vectorValueSelector_2.getNullVector()).andReturn(null).anyTimes(); + + EasyMock.replay(vectorValueSelector_1); + EasyMock.replay(vectorValueSelector_2); + + ColumnCapabilities columnCapabilities + = new ColumnCapabilitiesImpl().setType(ValueType.DOUBLE).setDictionaryEncoded(true); + vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); + expect(vectorColumnSelectorFactory.getColumnCapabilities("field_1")).andReturn(columnCapabilities).anyTimes(); + expect(vectorColumnSelectorFactory.makeValueSelector("field_1")) + .andReturn(vectorValueSelector_1).anyTimes(); + expect(vectorColumnSelectorFactory.getColumnCapabilities("field_2")).andReturn(columnCapabilities).anyTimes(); + expect(vectorColumnSelectorFactory.makeValueSelector("field_2")) + .andReturn(vectorValueSelector_2).anyTimes(); + EasyMock.replay(vectorColumnSelectorFactory); + } + + @Test + public void testAggregateSinglePosition() + { + ApproximateHistogramAggregatorFactory factory = buildHistogramAggFactory("field_1"); + ByteBuffer byteBuffer = ByteBuffer.allocate(factory.getMaxIntermediateSizeWithNulls()); + Assert.assertTrue(factory.canVectorize(vectorColumnSelectorFactory)); + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.aggregate(byteBuffer, 0, 0, 11); + ApproximateHistogram h = (ApproximateHistogram) vectorAggregator.get(byteBuffer, 0); + + // (2, 1), (9.5, 2), (19.33, 3), (32.67, 3), (45, 1) + Assert.assertArrayEquals(new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions(), 0.1f); + Assert.assertArrayEquals(new long[]{1, 2, 3, 3, 1}, h.bins()); + + factory = buildHistogramAggFactory("field_2"); + vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.aggregate(byteBuffer, 0, 0, 10); + h = (ApproximateHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertArrayEquals(new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions(), 0.1f); + Assert.assertArrayEquals(new long[]{1, 2, 3, 3, 1}, h.bins()); + + } + + @Test + public void testAggregateMultiPositions() + { + ApproximateHistogramAggregatorFactory factory = buildHistogramAggFactory("field_2"); + int size = factory.getMaxIntermediateSize(); + ByteBuffer byteBuffer = ByteBuffer.allocate(size * 2); + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + int[] positions = new int[]{0, size}; + vectorAggregator.init(byteBuffer, positions[0]); + vectorAggregator.init(byteBuffer, positions[1]); + vectorAggregator.aggregate(byteBuffer, 2, positions, null, 0); + // Put rest of 10 elements using the access indirection. Second vector gets the same element always + for (int i = 1; i < 10; i++) { + vectorAggregator.aggregate(byteBuffer, 2, positions, new int[]{i, 1}, 0); + } + + ApproximateHistogram h0 = (ApproximateHistogram) vectorAggregator.get(byteBuffer, 0); + Assert.assertArrayEquals(new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h0.positions(), 0.1f); + Assert.assertArrayEquals(new long[]{1, 2, 3, 3, 1}, h0.bins()); + + ApproximateHistogram h2 = (ApproximateHistogram) vectorAggregator.get(byteBuffer, size); + Assert.assertArrayEquals(new float[]{19}, h2.positions(), 0.1f); + Assert.assertArrayEquals(new long[]{10}, h2.bins()); + } + + private ApproximateHistogramAggregatorFactory buildHistogramAggFactory(String fieldName) + { + return new ApproximateHistogramAggregatorFactory( + "approxHisto", + fieldName, + 5, + 5, + 0.0f, + 45.0f, + false + ); + } +} diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramTest.java index bf929741863b..630d6e860273 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramTest.java @@ -1242,6 +1242,94 @@ public void testMergeDifferentBucketsContainedByOther() Assert.assertEquals(0, hIgnore.getUpperOutlierCount()); } + @Test + public void testCombineBase64() + { + FixedBucketsHistogram h = buildHistogram( + 0, + 20, + 5, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, + new float[]{1, 2, 7, 12, 18} + ); + + FixedBucketsHistogram h2 = buildHistogram( + 0, + 20, + 7, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, + new float[]{3, 8, 9, 19, 99, -50} + ); + + h.combine(h2.toBase64()); + Assert.assertEquals(5, h.getNumBuckets()); + Assert.assertEquals(4.0, h.getBucketSize(), 0.01); + Assert.assertEquals(0, h.getLowerLimit(), 0.01); + Assert.assertEquals(20, h.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{2, 3, 1, 1, 2}, h.getHistogram()); + Assert.assertEquals(9, h.getCount()); + Assert.assertEquals(1, h.getMin(), 0.01); + Assert.assertEquals(18, h.getMax(), 0.01); + Assert.assertEquals(0, h.getMissingValueCount()); + Assert.assertEquals(1, h.getLowerOutlierCount()); + Assert.assertEquals(1, h.getUpperOutlierCount()); + } + + @Test + public void testCombineAnotherHistogram() + { + FixedBucketsHistogram h = buildHistogram( + 0, + 20, + 5, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, + new float[]{1, 2, 7, 12, 18} + ); + + FixedBucketsHistogram h2 = buildHistogram( + 0, + 20, + 7, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, + new float[]{3, 8, 9, 19, 99, -50} + ); + + h.combine(h2); + Assert.assertEquals(5, h.getNumBuckets()); + Assert.assertEquals(4.0, h.getBucketSize(), 0.01); + Assert.assertEquals(0, h.getLowerLimit(), 0.01); + Assert.assertEquals(20, h.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{2, 3, 1, 1, 2}, h.getHistogram()); + Assert.assertEquals(9, h.getCount()); + Assert.assertEquals(1, h.getMin(), 0.01); + Assert.assertEquals(18, h.getMax(), 0.01); + Assert.assertEquals(0, h.getMissingValueCount()); + Assert.assertEquals(1, h.getLowerOutlierCount()); + Assert.assertEquals(1, h.getUpperOutlierCount()); + } + + @Test + public void testCombineNumber() + { + FixedBucketsHistogram h = new FixedBucketsHistogram( + 0, + 200, + 200, + FixedBucketsHistogram.OutlierHandlingMode.IGNORE + ); + + h.combine(10); + h.combine(20); + + Assert.assertEquals(0, h.getUpperOutlierCount()); + Assert.assertEquals(0, h.getLowerOutlierCount()); + Assert.assertEquals(2, h.getCount()); + Assert.assertEquals(10, h.getMin(), 0.01); + Assert.assertEquals(20, h.getMax(), 0.01); + } + @Test public void testMissing() { diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java new file mode 100644 index 000000000000..b3c592ab9e5d --- /dev/null +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.easymock.EasyMock; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.easymock.EasyMock.createMock; +import static org.easymock.EasyMock.expect; + +public class FixedBucketsHistogramVectorAggregatorTest +{ + private static final double[] DOUBLES = {1.0, 12.0, 3.0, 14.0, 15.0, 16.0}; + private static final boolean[] NULL_VECTOR = {false, false, false, false, true, false}; + private VectorColumnSelectorFactory vectorColumnSelectorFactory; + + @Before + public void setup() + { + NullHandling.initializeForTests(); + VectorValueSelector vectorValueSelector_1 = createMock(VectorValueSelector.class); + expect(vectorValueSelector_1.getDoubleVector()).andReturn(DOUBLES).anyTimes(); + expect(vectorValueSelector_1.getNullVector()).andReturn(NULL_VECTOR).anyTimes(); + + VectorValueSelector vectorValueSelector_2 = createMock(VectorValueSelector.class); + expect(vectorValueSelector_2.getDoubleVector()).andReturn(DOUBLES).anyTimes(); + expect(vectorValueSelector_2.getNullVector()).andReturn(null).anyTimes(); + + EasyMock.replay(vectorValueSelector_1); + EasyMock.replay(vectorValueSelector_2); + + ColumnCapabilities columnCapabilities + = new ColumnCapabilitiesImpl().setType(ValueType.DOUBLE).setDictionaryEncoded(true); + vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); + expect(vectorColumnSelectorFactory.getColumnCapabilities("field_1")).andReturn(columnCapabilities).anyTimes(); + expect(vectorColumnSelectorFactory.makeValueSelector("field_1")) + .andReturn(vectorValueSelector_1).anyTimes(); + expect(vectorColumnSelectorFactory.getColumnCapabilities("field_2")).andReturn(columnCapabilities).anyTimes(); + expect(vectorColumnSelectorFactory.makeValueSelector("field_2")) + .andReturn(vectorValueSelector_2).anyTimes(); + EasyMock.replay(vectorColumnSelectorFactory); + } + + @Test + public void testAggregateSinglePosition() + { + ByteBuffer byteBuffer = ByteBuffer.allocate(FixedBucketsHistogram.getFullStorageSize(2)); + FixedBucketsHistogramAggregatorFactory factory = buildHistogramAggFactory("field_1"); + Assert.assertTrue(factory.canVectorize(vectorColumnSelectorFactory)); + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.aggregate(byteBuffer, 0, 0, 6); + FixedBucketsHistogram h = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertEquals(2, h.getNumBuckets()); + Assert.assertEquals(10.0, h.getBucketSize(), 0.01); + Assert.assertEquals(1, h.getLowerLimit(), 0.01); + Assert.assertEquals(21, h.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{2, 3}, h.getHistogram()); + Assert.assertEquals(5, h.getCount()); + Assert.assertEquals(1.0, h.getMin(), 0.01); + Assert.assertEquals(16.0, h.getMax(), 0.01); + Assert.assertEquals(0, h.getMissingValueCount()); + Assert.assertEquals(1, h.getLowerOutlierCount()); // Due to default value of null which is 0 + Assert.assertEquals(0, h.getUpperOutlierCount()); + + factory = buildHistogramAggFactory("field_2"); + vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + vectorAggregator.init(byteBuffer, 0); + vectorAggregator.aggregate(byteBuffer, 0, 0, 6); + h = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertEquals(2, h.getNumBuckets()); + Assert.assertEquals(10.0, h.getBucketSize(), 0.01); + Assert.assertEquals(1, h.getLowerLimit(), 0.01); + Assert.assertEquals(21, h.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{2, 4}, h.getHistogram()); + Assert.assertEquals(6, h.getCount()); + Assert.assertEquals(1.0, h.getMin(), 0.01); + Assert.assertEquals(16.0, h.getMax(), 0.01); + Assert.assertEquals(0, h.getMissingValueCount()); + Assert.assertEquals(0, h.getLowerOutlierCount()); + Assert.assertEquals(0, h.getUpperOutlierCount()); + } + + @Test + public void testAggregateMultiPositions() + { + int size = FixedBucketsHistogram.getFullStorageSize(2); + ByteBuffer byteBuffer = ByteBuffer.allocate(size * 2); + FixedBucketsHistogramAggregatorFactory factory = buildHistogramAggFactory("field_2"); + VectorAggregator vectorAggregator = factory.factorizeVector(vectorColumnSelectorFactory); + int[] positions = new int[]{0, size}; + vectorAggregator.init(byteBuffer, positions[0]); + vectorAggregator.init(byteBuffer, positions[1]); + vectorAggregator.aggregate(byteBuffer, 2, positions, null, 0); + + FixedBucketsHistogram h0 = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertEquals(2, h0.getNumBuckets()); + Assert.assertEquals(10.0, h0.getBucketSize(), 0.01); + Assert.assertEquals(1, h0.getLowerLimit(), 0.01); + Assert.assertEquals(21, h0.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h0.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{1, 0}, h0.getHistogram()); + Assert.assertEquals(1, h0.getCount()); + Assert.assertEquals(1.0, h0.getMin(), 0.01); + Assert.assertEquals(1.0, h0.getMax(), 0.01); + Assert.assertEquals(0, h0.getMissingValueCount()); + Assert.assertEquals(0, h0.getLowerOutlierCount()); + Assert.assertEquals(0, h0.getUpperOutlierCount()); + + FixedBucketsHistogram h1 = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, positions[1]); + + Assert.assertEquals(2, h1.getNumBuckets()); + Assert.assertEquals(10.0, h1.getBucketSize(), 0.01); + Assert.assertEquals(1, h1.getLowerLimit(), 0.01); + Assert.assertEquals(21, h1.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h1.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{0, 1}, h1.getHistogram()); + Assert.assertEquals(1, h1.getCount()); + Assert.assertEquals(12.0, h1.getMin(), 0.01); + Assert.assertEquals(12.0, h1.getMax(), 0.01); + Assert.assertEquals(0, h1.getMissingValueCount()); + Assert.assertEquals(0, h1.getLowerOutlierCount()); + Assert.assertEquals(0, h1.getUpperOutlierCount()); + + // Tests when there is a level of indirection in accessing the vector + byteBuffer = ByteBuffer.allocate(size * 2); + vectorAggregator.init(byteBuffer, positions[0]); + vectorAggregator.init(byteBuffer, positions[1]); + vectorAggregator.aggregate(byteBuffer, 2, positions, new int[]{2, 3}, 0); + + FixedBucketsHistogram h2 = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, 0); + + Assert.assertEquals(2, h2.getNumBuckets()); + Assert.assertEquals(10.0, h2.getBucketSize(), 0.01); + Assert.assertEquals(1, h2.getLowerLimit(), 0.01); + Assert.assertEquals(21, h2.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h2.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{1, 0}, h2.getHistogram()); + Assert.assertEquals(1, h2.getCount()); + Assert.assertEquals(3.0, h2.getMin(), 0.01); + Assert.assertEquals(3.0, h2.getMax(), 0.01); + Assert.assertEquals(0, h2.getMissingValueCount()); + Assert.assertEquals(0, h2.getLowerOutlierCount()); + Assert.assertEquals(0, h2.getUpperOutlierCount()); + + FixedBucketsHistogram h3 = (FixedBucketsHistogram) vectorAggregator.get(byteBuffer, positions[1]); + + Assert.assertEquals(2, h3.getNumBuckets()); + Assert.assertEquals(10.0, h3.getBucketSize(), 0.01); + Assert.assertEquals(1, h3.getLowerLimit(), 0.01); + Assert.assertEquals(21, h3.getUpperLimit(), 0.01); + Assert.assertEquals(FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, h3.getOutlierHandlingMode()); + Assert.assertArrayEquals(new long[]{0, 1}, h3.getHistogram()); + Assert.assertEquals(1, h3.getCount()); + Assert.assertEquals(14.0, h3.getMin(), 0.01); + Assert.assertEquals(14.0, h3.getMax(), 0.01); + Assert.assertEquals(0, h3.getMissingValueCount()); + Assert.assertEquals(0, h3.getLowerOutlierCount()); + Assert.assertEquals(0, h3.getUpperOutlierCount()); + + } + + private FixedBucketsHistogramAggregatorFactory buildHistogramAggFactory(String fieldName) + { + return new FixedBucketsHistogramAggregatorFactory( + "fixedHisto", + fieldName, + 2, + 1, + 21, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, + false + ); + } +} From 81e72a51acadeebf324ee133f22b15ce5d63e5d4 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Tue, 25 Aug 2020 18:19:03 +0530 Subject: [PATCH 04/10] Fix tests in sql compatible mode --- .../histogram/FixedBucketsHistogramVectorAggregatorTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java index b3c592ab9e5d..ac7cd754bd9a 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java @@ -89,8 +89,9 @@ public void testAggregateSinglePosition() Assert.assertEquals(5, h.getCount()); Assert.assertEquals(1.0, h.getMin(), 0.01); Assert.assertEquals(16.0, h.getMax(), 0.01); - Assert.assertEquals(0, h.getMissingValueCount()); - Assert.assertEquals(1, h.getLowerOutlierCount()); // Due to default value of null which is 0 + // Default value of null is 0 which is an outlier. + Assert.assertEquals(NullHandling.replaceWithDefault() ? 0 : 1, h.getMissingValueCount()); + Assert.assertEquals(NullHandling.replaceWithDefault() ? 1 : 0, h.getLowerOutlierCount()); Assert.assertEquals(0, h.getUpperOutlierCount()); factory = buildHistogramAggFactory("field_2"); From a294a91caa28adb7df949de86ea2d6ff5b91b51c Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Thu, 27 Aug 2020 19:38:07 +0530 Subject: [PATCH 05/10] Typo fix --- .../query/aggregation/histogram/FixedBucketsHistogram.java | 5 +++-- .../histogram/ApproximateHistogramVectorAggregatorTest.java | 2 +- .../histogram/FixedBucketsHistogramVectorAggregatorTest.java | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java index 61e0caf0265e..e10203caf23e 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogram.java @@ -434,14 +434,15 @@ public void incrementMissing() } /** - * Merge another datapoint into this one. The other datapoin could be + * Merge another datapoint into this one. The other datapoint could be * - base64 encoded string of {@code FixedBucketsHistogram} * - {@code FixedBucketsHistogram} object * - Numeric value * * @param val */ - void combine(@Nullable Object val) + @VisibleForTesting + public void combine(@Nullable Object val) { if (val == null) { if (NullHandling.replaceWithDefault()) { diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java index 3716e154aa9e..b4babcd34c3a 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java @@ -59,7 +59,7 @@ public void setup() EasyMock.replay(vectorValueSelector_2); ColumnCapabilities columnCapabilities - = new ColumnCapabilitiesImpl().setType(ValueType.DOUBLE).setDictionaryEncoded(true); + = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); expect(vectorColumnSelectorFactory.getColumnCapabilities("field_1")).andReturn(columnCapabilities).anyTimes(); expect(vectorColumnSelectorFactory.makeValueSelector("field_1")) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java index ac7cd754bd9a..78f74edf7483 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregatorTest.java @@ -58,7 +58,7 @@ public void setup() EasyMock.replay(vectorValueSelector_2); ColumnCapabilities columnCapabilities - = new ColumnCapabilitiesImpl().setType(ValueType.DOUBLE).setDictionaryEncoded(true); + = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); expect(vectorColumnSelectorFactory.getColumnCapabilities("field_1")).andReturn(columnCapabilities).anyTimes(); expect(vectorColumnSelectorFactory.makeValueSelector("field_1")) From db59ddd104cd2fed2e82db5780bf9d54d4a6b188 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Fri, 28 Aug 2020 12:12:27 +0530 Subject: [PATCH 06/10] Fix comment --- docs/querying/query-context.md | 2 +- .../ApproximateHistogramFoldingBufferAggregatorInternal.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md index a1a62a05f47c..a937d84bba40 100644 --- a/docs/querying/query-context.md +++ b/docs/querying/query-context.md @@ -91,7 +91,7 @@ include "selector", "bound", "in", "like", "regex", "search", "and", "or", and " - All filters in filtered aggregators must offer vectorized row-matchers. - All aggregators must offer vectorized implementations. These include "count", "doubleSum", "floatSum", "longSum", "longMin", "longMax", "doubleMin", "doubleMax", "floatMin", "floatMax", "hyperUnique", "filtered", "approxHistogram", - "approxHistogramFold", and "fixedBucketsHistogram (with numerical input)". + "approxHistogramFold", and "fixedBucketsHistogram" (with numerical input). - No virtual columns. - For GroupBy: All dimension specs must be "default" (no extraction functions or filtered dimension specs). - For GroupBy: No multi-value dimensions. diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java index b9c9c0ec6591..6301bfba3e42 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java @@ -77,7 +77,7 @@ public void aggregate(ByteBuffer buf, int position, @Nullable ApproximateHistogr public void foldFast(ApproximateHistogram left, ApproximateHistogram right) { - //TODO: do these have to set in every call + //These have to set in every call since limits are transient and lost during serialization-deserialization left.setLowerLimit(lowerLimit); left.setUpperLimit(upperLimit); left.foldFast(right, tmpBufferA, tmpBufferB); From 182b6102d774eddb1ef06383ae79c1c30c9df115 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Fri, 28 Aug 2020 14:29:20 +0530 Subject: [PATCH 07/10] Add spelling --- website/.spelling | 1 + 1 file changed, 1 insertion(+) diff --git a/website/.spelling b/website/.spelling index 249db61de5f5..14fd910f4a78 100644 --- a/website/.spelling +++ b/website/.spelling @@ -596,6 +596,7 @@ url - ../docs/development/extensions-core/approximate-histograms.md approxHistogram approxHistogramFold +fixedBucketsHistogram bucketNum lowerLimit numBuckets From 44f276f3e4c74a755a0e28ee4c5d560d03fc6ac6 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Thu, 3 Sep 2020 15:27:06 +0530 Subject: [PATCH 08/10] Vectorize only for supported types --- .../ApproximateHistogramAggregatorFactory.java | 6 +++++- .../ApproximateHistogramFoldingAggregatorFactory.java | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java index 592dbfbdab2d..cde33fd605f2 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregatorFactory.java @@ -37,6 +37,7 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -118,7 +119,10 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory metricVector @Override public boolean canVectorize(ColumnInspector columnInspector) { - return true; + /* skip vectorization for string types which may be parseable to numbers. There is no vector equivalent of + string value selector*/ + ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(fieldName); + return (capabilities != null) && capabilities.getType().isNumeric(); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java index 5e15c4d9b2e4..7a8a80cc21b2 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingAggregatorFactory.java @@ -29,8 +29,11 @@ import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -103,6 +106,14 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory metricVector return new ApproximateHistogramFoldingVectorAggregator(selector, resolution, lowerLimit, upperLimit); } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(fieldName); + return (capabilities != null) && (capabilities.getType() == ValueType.COMPLEX); + } + @Override public AggregatorFactory getCombiningFactory() { From 09ace2d0b9aaf40082891b7f34e2b4420a3c8a3d Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Thu, 3 Sep 2020 15:28:29 +0530 Subject: [PATCH 09/10] Rename internal aggregator files --- .../histogram/ApproximateHistogramBufferAggregator.java | 4 ++-- ...l.java => ApproximateHistogramBufferAggregatorHelper.java} | 4 ++-- .../ApproximateHistogramFoldingBufferAggregator.java | 4 ++-- ...=> ApproximateHistogramFoldingBufferAggregatorHelper.java} | 4 ++-- .../ApproximateHistogramFoldingVectorAggregator.java | 4 ++-- .../histogram/ApproximateHistogramVectorAggregator.java | 4 ++-- .../histogram/FixedBucketsHistogramBufferAggregator.java | 4 ++-- ....java => FixedBucketsHistogramBufferAggregatorHelper.java} | 4 ++-- .../histogram/FixedBucketsHistogramVectorAggregator.java | 4 ++-- 9 files changed, 18 insertions(+), 18 deletions(-) rename extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/{ApproximateHistogramBufferAggregatorInternal.java => ApproximateHistogramBufferAggregatorHelper.java} (94%) rename extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/{ApproximateHistogramFoldingBufferAggregatorInternal.java => ApproximateHistogramFoldingBufferAggregatorHelper.java} (96%) rename extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/{FixedBucketsHistogramBufferAggregatorInternal.java => FixedBucketsHistogramBufferAggregatorHelper.java} (96%) diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java index 2222ce635015..a33d27baaed8 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregator.java @@ -28,12 +28,12 @@ public class ApproximateHistogramBufferAggregator implements BufferAggregator { private final BaseFloatColumnValueSelector selector; - private final ApproximateHistogramBufferAggregatorInternal innerAggregator; + private final ApproximateHistogramBufferAggregatorHelper innerAggregator; public ApproximateHistogramBufferAggregator(BaseFloatColumnValueSelector selector, int resolution) { this.selector = selector; - this.innerAggregator = new ApproximateHistogramBufferAggregatorInternal(resolution); + this.innerAggregator = new ApproximateHistogramBufferAggregatorHelper(resolution); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorHelper.java similarity index 94% rename from extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java rename to extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorHelper.java index 454095b7861c..b597c4d66d69 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramBufferAggregatorHelper.java @@ -25,11 +25,11 @@ * A helper class used by {@link ApproximateHistogramBufferAggregator} and {@link ApproximateHistogramVectorAggregator} * for aggregation operations on byte buffers. Getting the object from value selectors is outside this class. */ -final class ApproximateHistogramBufferAggregatorInternal +final class ApproximateHistogramBufferAggregatorHelper { private final int resolution; - public ApproximateHistogramBufferAggregatorInternal(int resolution) + public ApproximateHistogramBufferAggregatorHelper(int resolution) { this.resolution = resolution; } diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java index 96c385b2de54..811e2bfb00f3 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregator.java @@ -28,7 +28,7 @@ public class ApproximateHistogramFoldingBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; - private final ApproximateHistogramFoldingBufferAggregatorInternal innerAggregator; + private final ApproximateHistogramFoldingBufferAggregatorHelper innerAggregator; public ApproximateHistogramFoldingBufferAggregator( BaseObjectColumnValueSelector selector, @@ -38,7 +38,7 @@ public ApproximateHistogramFoldingBufferAggregator( ) { this.selector = selector; - this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorInternal(resolution, lowerLimit, upperLimit); + this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorHelper(resolution, lowerLimit, upperLimit); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorHelper.java similarity index 96% rename from extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java rename to extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorHelper.java index 6301bfba3e42..64c6a4cc7007 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingBufferAggregatorHelper.java @@ -27,7 +27,7 @@ * {@link ApproximateHistogramFoldingVectorAggregator} for aggregation operations on byte buffers. * Getting the object from value selectors is outside this class. */ -final class ApproximateHistogramFoldingBufferAggregatorInternal +final class ApproximateHistogramFoldingBufferAggregatorHelper { private final int resolution; private final float upperLimit; @@ -36,7 +36,7 @@ final class ApproximateHistogramFoldingBufferAggregatorInternal private float[] tmpBufferA; private long[] tmpBufferB; - public ApproximateHistogramFoldingBufferAggregatorInternal( + public ApproximateHistogramFoldingBufferAggregatorHelper( int resolution, float lowerLimit, float upperLimit diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java index 41e4306003c9..69c6e596dcfc 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregator.java @@ -27,7 +27,7 @@ public class ApproximateHistogramFoldingVectorAggregator implements VectorAggregator { - private final ApproximateHistogramFoldingBufferAggregatorInternal innerAggregator; + private final ApproximateHistogramFoldingBufferAggregatorHelper innerAggregator; private final VectorObjectSelector selector; public ApproximateHistogramFoldingVectorAggregator( @@ -38,7 +38,7 @@ public ApproximateHistogramFoldingVectorAggregator( ) { this.selector = selector; - this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorInternal(resolution, lowerLimit, upperLimit); + this.innerAggregator = new ApproximateHistogramFoldingBufferAggregatorHelper(resolution, lowerLimit, upperLimit); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java index a30a36f534ba..728271f89d66 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregator.java @@ -29,7 +29,7 @@ public class ApproximateHistogramVectorAggregator implements VectorAggregator { private final VectorValueSelector selector; - private final ApproximateHistogramBufferAggregatorInternal innerAggregator; + private final ApproximateHistogramBufferAggregatorHelper innerAggregator; public ApproximateHistogramVectorAggregator( VectorValueSelector selector, @@ -37,7 +37,7 @@ public ApproximateHistogramVectorAggregator( ) { this.selector = selector; - this.innerAggregator = new ApproximateHistogramBufferAggregatorInternal(resolution); + this.innerAggregator = new ApproximateHistogramBufferAggregatorHelper(resolution); } @Override diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java index 2d3feff11687..1ecebb545774 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregator.java @@ -28,7 +28,7 @@ public class FixedBucketsHistogramBufferAggregator implements BufferAggregator { private final BaseObjectColumnValueSelector selector; - private final FixedBucketsHistogramBufferAggregatorInternal innerAggregator; + private final FixedBucketsHistogramBufferAggregatorHelper innerAggregator; public FixedBucketsHistogramBufferAggregator( BaseObjectColumnValueSelector selector, @@ -39,7 +39,7 @@ public FixedBucketsHistogramBufferAggregator( ) { this.selector = selector; - this.innerAggregator = new FixedBucketsHistogramBufferAggregatorInternal( + this.innerAggregator = new FixedBucketsHistogramBufferAggregatorHelper( lowerLimit, upperLimit, numBuckets, diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorHelper.java similarity index 96% rename from extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java rename to extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorHelper.java index b61e639c6a76..8844e25ae4ca 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorInternal.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramBufferAggregatorHelper.java @@ -27,14 +27,14 @@ * {@link FixedBucketsHistogramVectorAggregator} for aggregation operations on byte buffers. * Getting the object from value selectors is outside this class. */ -final class FixedBucketsHistogramBufferAggregatorInternal +final class FixedBucketsHistogramBufferAggregatorHelper { private final double lowerLimit; private final double upperLimit; private final int numBuckets; private final FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode; - public FixedBucketsHistogramBufferAggregatorInternal( + public FixedBucketsHistogramBufferAggregatorHelper( double lowerLimit, double upperLimit, int numBuckets, diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java index 2b7b8cb16734..8bfbe6fe194a 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramVectorAggregator.java @@ -28,7 +28,7 @@ public class FixedBucketsHistogramVectorAggregator implements VectorAggregator { private final VectorValueSelector selector; - private final FixedBucketsHistogramBufferAggregatorInternal innerAggregator; + private final FixedBucketsHistogramBufferAggregatorHelper innerAggregator; public FixedBucketsHistogramVectorAggregator( VectorValueSelector selector, @@ -39,7 +39,7 @@ public FixedBucketsHistogramVectorAggregator( ) { this.selector = selector; - this.innerAggregator = new FixedBucketsHistogramBufferAggregatorInternal( + this.innerAggregator = new FixedBucketsHistogramBufferAggregatorHelper( lowerLimit, upperLimit, numBuckets, From adfb1353cb2b0fb7c8f52232f5c14e06b74f3496 Mon Sep 17 00:00:00 2001 From: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com> Date: Thu, 3 Sep 2020 17:02:56 +0530 Subject: [PATCH 10/10] Fix tests --- ...eHistogramFoldingVectorAggregatorTest.java | 28 ++++++++++++++++++- ...roximateHistogramVectorAggregatorTest.java | 16 +++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java index 893a720856e1..ee7283b20a82 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramFoldingVectorAggregatorTest.java @@ -20,6 +20,8 @@ package org.apache.druid.query.aggregation.histogram; import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; import org.easymock.EasyMock; @@ -61,9 +63,28 @@ public void setup() vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class); expect(vectorColumnSelectorFactory.makeObjectSelector("field")) .andReturn(vectorObjectSelector).anyTimes(); + expect(vectorColumnSelectorFactory.getColumnCapabilities("field")).andReturn( + new ColumnCapabilitiesImpl().setType(ValueType.COMPLEX) + ); + expect(vectorColumnSelectorFactory.getColumnCapabilities("string_field")).andReturn( + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + ); + expect(vectorColumnSelectorFactory.getColumnCapabilities("double_field")).andReturn( + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + ); EasyMock.replay(vectorColumnSelectorFactory); } + @Test + public void doNotVectorizedNonComplexTypes() + { + ApproximateHistogramFoldingAggregatorFactory factory = buildHistogramFactory("string_field"); + Assert.assertFalse(factory.canVectorize(vectorColumnSelectorFactory)); + + factory = buildHistogramFactory("double_field"); + Assert.assertFalse(factory.canVectorize(vectorColumnSelectorFactory)); + } + @Test public void testAggregateSinglePosition() { @@ -103,10 +124,15 @@ public void testAggregateMultiPositions() } private ApproximateHistogramFoldingAggregatorFactory buildHistogramFactory() + { + return buildHistogramFactory("field"); + } + + private ApproximateHistogramFoldingAggregatorFactory buildHistogramFactory(String fieldName) { return new ApproximateHistogramFoldingAggregatorFactory( "approximateHistoFold", - "field", + fieldName, 5, 5, 0f, diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java index b4babcd34c3a..9958194a74e6 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramVectorAggregatorTest.java @@ -67,9 +67,25 @@ public void setup() expect(vectorColumnSelectorFactory.getColumnCapabilities("field_2")).andReturn(columnCapabilities).anyTimes(); expect(vectorColumnSelectorFactory.makeValueSelector("field_2")) .andReturn(vectorValueSelector_2).anyTimes(); + expect(vectorColumnSelectorFactory.getColumnCapabilities("string_field")).andReturn( + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + ); + expect(vectorColumnSelectorFactory.getColumnCapabilities("complex_field")).andReturn( + new ColumnCapabilitiesImpl().setType(ValueType.COMPLEX) + ); EasyMock.replay(vectorColumnSelectorFactory); } + @Test + public void doNotVectorizedNonNumericTypes() + { + ApproximateHistogramAggregatorFactory factory = buildHistogramAggFactory("string_field"); + Assert.assertFalse(factory.canVectorize(vectorColumnSelectorFactory)); + + factory = buildHistogramAggFactory("complex_field"); + Assert.assertFalse(factory.canVectorize(vectorColumnSelectorFactory)); + } + @Test public void testAggregateSinglePosition() {