From 390a6739a88aa6466e044b7aaefd1c3e2ecf3f13 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Mon, 2 May 2022 16:00:46 -0700 Subject: [PATCH 01/10] KLL sketch --- .../druid/segment/column/ColumnType.java | 2 +- .../KllDoublesSketchAggregatorFactory.java | 234 +++++++ .../kll/KllDoublesSketchBuildAggregator.java | 40 ++ ...KllDoublesSketchBuildBufferAggregator.java | 50 ++ ...blesSketchBuildBufferAggregatorHelper.java | 46 ++ ...KllDoublesSketchBuildVectorAggregator.java | 104 +++ .../KllDoublesSketchComplexMetricSerde.java | 112 ++++ .../kll/KllDoublesSketchJsonSerializer.java | 39 ++ .../kll/KllDoublesSketchMergeAggregator.java | 50 ++ ...llDoublesSketchMergeAggregatorFactory.java | 77 +++ ...KllDoublesSketchMergeBufferAggregator.java | 43 ++ ...blesSketchMergeBufferAggregatorHelper.java | 48 ++ ...KllDoublesSketchMergeVectorAggregator.java | 105 +++ .../kll/KllDoublesSketchObjectStrategy.java | 63 ++ .../kll/KllDoublesSketchOperations.java | 59 ++ .../KllDoublesSketchToCDFPostAggregator.java | 160 +++++ ...oublesSketchToHistogramPostAggregator.java | 221 ++++++ ...DoublesSketchToQuantilePostAggregator.java | 144 ++++ ...oublesSketchToQuantilesPostAggregator.java | 158 +++++ .../KllDoublesSketchToRankPostAggregator.java | 144 ++++ ...llDoublesSketchToStringPostAggregator.java | 134 ++++ .../kll/KllFloatsSketchAggregatorFactory.java | 234 +++++++ .../kll/KllFloatsSketchBuildAggregator.java | 40 ++ .../KllFloatsSketchBuildBufferAggregator.java | 50 ++ ...oatsSketchBuildBufferAggregatorHelper.java | 46 ++ .../KllFloatsSketchBuildVectorAggregator.java | 104 +++ .../KllFloatsSketchComplexMetricSerde.java | 112 ++++ .../kll/KllFloatsSketchJsonSerializer.java | 39 ++ .../kll/KllFloatsSketchMergeAggregator.java | 50 ++ ...KllFloatsSketchMergeAggregatorFactory.java | 77 +++ .../KllFloatsSketchMergeBufferAggregator.java | 43 ++ ...oatsSketchMergeBufferAggregatorHelper.java | 48 ++ .../KllFloatsSketchMergeVectorAggregator.java | 105 +++ .../kll/KllFloatsSketchObjectStrategy.java | 63 ++ .../kll/KllFloatsSketchOperations.java | 59 ++ .../KllFloatsSketchToCDFPostAggregator.java | 160 +++++ ...FloatsSketchToHistogramPostAggregator.java | 221 ++++++ ...lFloatsSketchToQuantilePostAggregator.java | 144 ++++ ...FloatsSketchToQuantilesPostAggregator.java | 158 +++++ .../KllFloatsSketchToRankPostAggregator.java | 144 ++++ ...KllFloatsSketchToStringPostAggregator.java | 134 ++++ .../kll/KllSketchAggregatorFactory.java | 279 ++++++++ .../kll/KllSketchBuildAggregator.java | 62 ++ .../kll/KllSketchBuildBufferAggregator.java | 89 +++ .../KllSketchBuildBufferAggregatorHelper.java | 109 +++ .../kll/KllSketchMergeAggregator.java | 62 ++ .../kll/KllSketchMergeAggregatorFactory.java | 70 ++ .../kll/KllSketchMergeBufferAggregator.java | 86 +++ .../KllSketchMergeBufferAggregatorHelper.java | 112 ++++ .../datasketches/kll/KllSketchModule.java | 104 +++ .../kll/KllSketchNoOpAggregator.java | 60 ++ .../kll/KllSketchNoOpBufferAggregator.java | 104 +++ .../datasketches/kll/package-info.java | 23 + ...rg.apache.druid.initialization.DruidModule | 1 + .../datasketches/kll/GenerateTestData.java | 101 +++ ...KllDoublesSketchAggregatorFactoryTest.java | 156 +++++ .../kll/KllDoublesSketchAggregatorTest.java | 632 ++++++++++++++++++ ...llDoublesSketchComplexMetricSerdeTest.java | 95 +++ ...ublesSketchMergeAggregatorFactoryTest.java | 63 ++ ...lDoublesSketchToCDFPostAggregatorTest.java | 184 +++++ ...esSketchToHistogramPostAggregatorTest.java | 220 ++++++ ...lesSketchToQuantilePostAggregatorTest.java | 73 ++ ...esSketchToQuantilesPostAggregatorTest.java | 186 ++++++ ...DoublesSketchToRankPostAggregatorTest.java | 159 +++++ ...ublesSketchToStringPostAggregatorTest.java | 89 +++ .../KllFloatsSketchAggregatorFactoryTest.java | 156 +++++ .../kll/KllFloatsSketchAggregatorTest.java | 632 ++++++++++++++++++ ...KllFloatsSketchComplexMetricSerdeTest.java | 95 +++ ...loatsSketchMergeAggregatorFactoryTest.java | 63 ++ ...llFloatsSketchToCDFPostAggregatorTest.java | 184 +++++ ...tsSketchToHistogramPostAggregatorTest.java | 220 ++++++ ...atsSketchToQuantilePostAggregatorTest.java | 73 ++ ...tsSketchToQuantilesPostAggregatorTest.java | 186 ++++++ ...lFloatsSketchToRankPostAggregatorTest.java | 159 +++++ ...loatsSketchToStringPostAggregatorTest.java | 89 +++ .../kll/kll_doubles_sketch_build_data.tsv | 400 +++++++++++ .../resources/kll/kll_doubles_sketch_data.tsv | 20 + .../kll/kll_floats_sketch_build_data.tsv | 400 +++++++++++ .../resources/kll/kll_floats_sketch_data.tsv | 20 + pom.xml | 2 +- .../query/aggregation/AggregatorUtil.java | 5 + .../aggregation/post/PostAggregatorIds.java | 12 + 82 files changed, 9897 insertions(+), 2 deletions(-) create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildVectorAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerde.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchJsonSerializer.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeVectorAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchObjectStrategy.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchOperations.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildVectorAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerde.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchJsonSerializer.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeVectorAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchObjectStrategy.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchOperations.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregatorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchModule.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpBufferAggregator.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/package-info.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/GenerateTestData.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerdeTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactoryTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerdeTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactoryTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregatorTest.java create mode 100644 extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_build_data.tsv create mode 100644 extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_data.tsv create mode 100644 extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_build_data.tsv create mode 100644 extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_data.tsv diff --git a/core/src/main/java/org/apache/druid/segment/column/ColumnType.java b/core/src/main/java/org/apache/druid/segment/column/ColumnType.java index 705186c54458..1dd1d62e3026 100644 --- a/core/src/main/java/org/apache/druid/segment/column/ColumnType.java +++ b/core/src/main/java/org/apache/druid/segment/column/ColumnType.java @@ -34,10 +34,10 @@ public class ColumnType extends BaseTypeSignature public static final ColumnType DOUBLE = new ColumnType(ValueType.DOUBLE, null, null); public static final ColumnType FLOAT = new ColumnType(ValueType.FLOAT, null, null); // currently, arrays only come from expressions or aggregators - // and there are no native float expressions (or aggs which produce float arrays) public static final ColumnType STRING_ARRAY = new ColumnType(ValueType.ARRAY, null, STRING); public static final ColumnType LONG_ARRAY = new ColumnType(ValueType.ARRAY, null, LONG); public static final ColumnType DOUBLE_ARRAY = new ColumnType(ValueType.ARRAY, null, DOUBLE); + public static final ColumnType FLOAT_ARRAY = new ColumnType(ValueType.ARRAY, null, FLOAT); public static final ColumnType UNKNOWN_COMPLEX = new ColumnType(ValueType.COMPLEX, null, null); @JsonCreator diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java new file mode 100644 index 000000000000..6bafec93910c --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.VectorColumnProcessorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +public class KllDoublesSketchAggregatorFactory extends KllSketchAggregatorFactory +{ + public static final Comparator COMPARATOR = + Comparator.nullsFirst(Comparator.comparingLong(KllDoublesSketch::getN)); + + @JsonCreator + public KllDoublesSketchAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName, + @JsonProperty("k") @Nullable final Integer k, + @JsonProperty("maxStreamLength") @Nullable final Long maxStreamLength + ) + { + this(name, fieldName, k, maxStreamLength, AggregatorUtil.KLL_DOUBLES_SKETCH_BUILD_CACHE_TYPE_ID); + } + + KllDoublesSketchAggregatorFactory( + final String name, + final String fieldName, + @Nullable final Integer k, + @Nullable final Long maxStreamLength, + final byte cacheTypeId + ) + { + super( + name, + fieldName, + k, + maxStreamLength, + cacheTypeId + ); + } + + @Override + public Comparator getComparator() + { + return COMPARATOR; + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new KllDoublesSketchAggregatorFactory( + getFieldName(), + getFieldName(), + getK(), + getMaxStreamLength() + ) + ); + } + + @Override + public AggregatorFactory getMergingFactory(final AggregatorFactory other) + throws AggregatorFactoryNotMergeableException + { + if (other.getName().equals(this.getName()) && other instanceof KllDoublesSketchAggregatorFactory) { + // KllSketch supports merging with different k. + // The result will have effective k between the specified k and the minimum k from all input sketches + // to achieve higher accuracy as much as possible. + return new KllDoublesSketchMergeAggregatorFactory( + getName(), + Math.max(getK(), ((KllDoublesSketchAggregatorFactory) other).getK()), + getMaxStreamLength() + ); + } else { + throw new AggregatorFactoryNotMergeableException(this, other); + } + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new KllDoublesSketchMergeAggregatorFactory(getName(), getK(), getMaxStreamLength()); + } + + @Override + KllDoublesSketch getEmptySketch() + { + return KllDoublesSketchOperations.EMPTY_SKETCH; + } + + @Override + KllDoublesSketch newHeapInstance(final int k) + { + return KllDoublesSketch.newHeapInstance(k); + } + + @Override + Class getSketchClass() + { + return KllDoublesSketch.class; + } + + @Override + int getMaxSerializedSizeBytes(final int k, final long n) + { + return KllDoublesSketch.getMaxSerializedSizeBytes(k, n, true); + } + + @Override + KllSketchBuildAggregator getBuildAggregator(final ColumnValueSelector selector) + { + return new KllDoublesSketchBuildAggregator(selector, getK()); + } + + @Override + KllSketchMergeAggregator getMergeAggregator(final ColumnValueSelector selector) + { + return new KllDoublesSketchMergeAggregator(selector, getK()); + } + + @Override + KllDoublesSketchBuildBufferAggregator getBuildBufferAggregator(final ColumnValueSelector selector) + { + return new KllDoublesSketchBuildBufferAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + KllDoublesSketchMergeBufferAggregator getMergeBufferAggregator(final ColumnValueSelector selector) + { + return new KllDoublesSketchMergeBufferAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + return ColumnProcessors.makeVectorProcessor( + getFieldName(), + new VectorColumnProcessorFactory() + { + @Override + public VectorAggregator makeSingleValueDimensionProcessor( + ColumnCapabilities capabilities, + SingleValueDimensionVectorSelector selector + ) + { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + + @Override + public VectorAggregator makeMultiValueDimensionProcessor( + ColumnCapabilities capabilities, + MultiValueDimensionVectorSelector selector + ) + { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + + @Override + public VectorAggregator makeFloatProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllDoublesSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeDoubleProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllDoublesSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllDoublesSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + return new KllDoublesSketchMergeVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + }, + selectorFactory + ); + } + + @Override + public Object deserialize(final Object object) + { + return KllDoublesSketchOperations.deserialize(object); + } + + @Override + public ColumnType getIntermediateType() + { + return KllSketchModule.DOUBLES_TYPE; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildAggregator.java new file mode 100644 index 000000000000..875e624788cf --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildAggregator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.segment.ColumnValueSelector; + +public class KllDoublesSketchBuildAggregator extends KllSketchBuildAggregator +{ + public KllDoublesSketchBuildAggregator(final ColumnValueSelector valueSelector, final int size) + { + super(valueSelector, KllDoublesSketch.newHeapInstance(size)); + } + + @Override + public synchronized void aggregate() + { + if (valueSelector.isNull()) { + return; + } + sketch.update(valueSelector.getDouble()); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregator.java new file mode 100644 index 000000000000..c4c67aae2ad9 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +public class KllDoublesSketchBuildBufferAggregator + extends KllSketchBuildBufferAggregator +{ + public KllDoublesSketchBuildBufferAggregator( + final ColumnValueSelector valueSelector, + final int size, + final int maxIntermediateSize + ) + { + super(valueSelector, new KllDoublesSketchBuildBufferAggregatorHelper(size, maxIntermediateSize)); + } + + @Override + public void aggregate(final ByteBuffer buffer, final int position) + { + if (selector.isNull()) { + return; + } + + final KllDoublesSketch sketch = helper.getSketchAtPosition(buffer, position); + sketch.update(selector.getDouble()); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java new file mode 100644 index 000000000000..9d977a6b6f0e --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +public class KllDoublesSketchBuildBufferAggregatorHelper + extends KllSketchBuildBufferAggregatorHelper +{ + public KllDoublesSketchBuildBufferAggregatorHelper(final int size, final int maxIntermediateSize) + { + super(size, maxIntermediateSize); + } + + @Override + KllDoublesSketch newDirectInstance(final int k, final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllDoublesSketch.newDirectInstance(mem, reqServer); + } + + @Override + KllDoublesSketch writableWrap(final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllDoublesSketch.writableWrap(mem, reqServer); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildVectorAggregator.java new file mode 100644 index 000000000000..e2cc90859d93 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildVectorAggregator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class KllDoublesSketchBuildVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + private final KllDoublesSketchBuildBufferAggregatorHelper helper; + + KllDoublesSketchBuildVectorAggregator( + final VectorValueSelector selector, + final int size, + final int maxIntermediateSize + ) + { + this.selector = selector; + this.helper = new KllDoublesSketchBuildBufferAggregatorHelper(size, maxIntermediateSize); + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + helper.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final double[] doubles = selector.getDoubleVector(); + final boolean[] nulls = selector.getNullVector(); + + final KllDoublesSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (nulls == null || !nulls[i]) { + sketch.update(doubles[i]); + } + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final double[] doubles = selector.getDoubleVector(); + final boolean[] nulls = selector.getNullVector(); + + for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; + + if (nulls == null || !nulls[idx]) { + final int position = positions[i] + positionOffset; + helper.getSketchAtPosition(buf, position).update(doubles[idx]); + } + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return helper.get(buf, position); + } + + @Override + public void relocate(final int oldPosition, final int newPosition, final ByteBuffer oldBuf, final ByteBuffer newBuf) + { + helper.relocate(oldPosition, newPosition, oldBuf, newBuf); + } + + @Override + public void close() + { + helper.clear(); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerde.java new file mode 100644 index 000000000000..4c18a9785607 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerde.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.google.common.primitives.Doubles; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexColumnPartSupplier; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class KllDoublesSketchComplexMetricSerde extends ComplexMetricSerde +{ + + private static final KllDoublesSketchObjectStrategy STRATEGY = new KllDoublesSketchObjectStrategy(); + + @Override + public String getTypeName() + { + return KllSketchModule.DOUBLES_SKETCH; + } + + @Override + public ObjectStrategy getObjectStrategy() + { + return STRATEGY; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + private static final int MIN_K = 8; // package one input value into the smallest sketch + + @Override + public Class extractedClass() + { + return KllDoublesSketch.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + final Object object = inputRow.getRaw(metricName); + if (object instanceof String) { // everything is a string during ingestion + final String objectString = (String) object; + // Autodetection of the input format: empty string, number, or base64 encoded sketch + // A serialized KllDoublesSketch, as currently implemented, always has 0 in the first 5 bits. + // This is not a digit in base64 + final Double doubleValue; + if (objectString.isEmpty()) { + return KllDoublesSketchOperations.EMPTY_SKETCH; + } else if ((doubleValue = Doubles.tryParse(objectString)) != null) { + final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance(MIN_K); + sketch.update(doubleValue); + return sketch; + } + } else if (object instanceof Number) { // this is for reindexing + final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance(MIN_K); + sketch.update(((Number) object).doubleValue()); + return sketch; + } + + if (object == null || object instanceof KllDoublesSketch || object instanceof Memory) { + return object; + } + return KllDoublesSketchOperations.deserialize(object); + } + }; + } + + @Override + public void deserializeColumn(final ByteBuffer buffer, final ColumnBuilder builder) + { + final GenericIndexed column = GenericIndexed.read(buffer, STRATEGY, builder.getFileMapper()); + builder.setComplexColumnSupplier(new ComplexColumnPartSupplier(getTypeName(), column)); + } + + // support large columns + @Override + public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) + { + return LargeColumnSupportedComplexColumnSerializer.create(segmentWriteOutMedium, column, this.getObjectStrategy()); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchJsonSerializer.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchJsonSerializer.java new file mode 100644 index 000000000000..ecf0d36fb09e --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchJsonSerializer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import org.apache.datasketches.kll.KllDoublesSketch; + +import java.io.IOException; + +public class KllDoublesSketchJsonSerializer extends JsonSerializer +{ + + @Override + public void serialize(final KllDoublesSketch sketch, final JsonGenerator generator, final SerializerProvider provider) + throws IOException + { + generator.writeBinary(sketch.toByteArray()); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregator.java new file mode 100644 index 000000000000..b3212d8e47e0 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.segment.ColumnValueSelector; + +public class KllDoublesSketchMergeAggregator extends KllSketchMergeAggregator +{ + public KllDoublesSketchMergeAggregator(final ColumnValueSelector selector, final int k) + { + super(selector, KllDoublesSketch.newHeapInstance(k)); + } + + @Override + public synchronized void aggregate() + { + updateUnion(selector, union); + } + + static void updateUnion(ColumnValueSelector selector, KllDoublesSketch union) + { + final Object object = selector.getObject(); + if (object == null) { + return; + } + if (object instanceof KllDoublesSketch) { + union.merge((KllDoublesSketch) object); + } else { + union.update(selector.getDouble()); + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactory.java new file mode 100644 index 000000000000..dbcd34b26c6c --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactory.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.annotations.VisibleForTesting; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.NilColumnValueSelector; + +import javax.annotation.Nullable; + +public class KllDoublesSketchMergeAggregatorFactory extends KllDoublesSketchAggregatorFactory +{ + + @JsonCreator + public KllDoublesSketchMergeAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("k") @Nullable final Integer k, + @JsonProperty("maxStreamLength") @Nullable final Long maxStreamLength + ) + { + super(name, name, k, maxStreamLength, AggregatorUtil.KLL_DOUBLES_SKETCH_MERGE_CACHE_TYPE_ID); + } + + @VisibleForTesting + KllDoublesSketchMergeAggregatorFactory( + final String name, + @Nullable final Integer k + ) + { + this(name, k, null); + } + + @Override + public Aggregator factorize(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpAggregator(KllDoublesSketchOperations.EMPTY_SKETCH); + } + return getMergeAggregator(selector); + } + + @Override + public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpBufferAggregator(KllDoublesSketchOperations.EMPTY_SKETCH); + } + return getMergeBufferAggregator(selector); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregator.java new file mode 100644 index 000000000000..be371f4bef9b --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregator.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +public class KllDoublesSketchMergeBufferAggregator extends KllSketchMergeBufferAggregator +{ + public KllDoublesSketchMergeBufferAggregator( + final ColumnValueSelector selector, + final int k, + final int maxIntermediateSize + ) + { + super(selector, new KllDoublesSketchMergeBufferAggregatorHelper(k, maxIntermediateSize)); + } + + @Override + public void aggregate(final ByteBuffer buffer, final int position) + { + KllDoublesSketchMergeAggregator.updateUnion(selector, helper.getSketchAtPosition(buffer, position)); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java new file mode 100644 index 000000000000..fbc64e2709e1 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +public class KllDoublesSketchMergeBufferAggregatorHelper extends KllSketchMergeBufferAggregatorHelper +{ + public KllDoublesSketchMergeBufferAggregatorHelper( + final int k, + final int maxIntermediateSize + ) + { + super(k, maxIntermediateSize); + } + + @Override + KllDoublesSketch newDirectInstance(int k, WritableMemory mem, MemoryRequestServer reqServer) + { + return KllDoublesSketch.newDirectInstance(mem, reqServer); + } + + @Override + KllDoublesSketch writableWrap(WritableMemory mem, MemoryRequestServer reqServer) + { + return KllDoublesSketch.writableWrap(mem, reqServer); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeVectorAggregator.java new file mode 100644 index 000000000000..b8718306b4a3 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeVectorAggregator.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class KllDoublesSketchMergeVectorAggregator implements VectorAggregator +{ + private final VectorObjectSelector selector; + private final KllDoublesSketchMergeBufferAggregatorHelper helper; + + public KllDoublesSketchMergeVectorAggregator( + final VectorObjectSelector selector, + final int k, + final int maxIntermediateSize + ) + { + this.selector = selector; + this.helper = new KllDoublesSketchMergeBufferAggregatorHelper(k, maxIntermediateSize); + } + + @Override + public void init(ByteBuffer buf, int position) + { + helper.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final Object[] vector = selector.getObjectVector(); + + final KllDoublesSketch union = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + final KllDoublesSketch sketch = (KllDoublesSketch) vector[i]; + if (sketch != null) { + union.merge(sketch); + } + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final KllDoublesSketch sketch = (KllDoublesSketch) vector[rows != null ? rows[i] : i]; + + if (sketch != null) { + final int position = positions[i] + positionOffset; + final KllDoublesSketch union = helper.getSketchAtPosition(buf, position); + union.merge(sketch); + } + } + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return helper.get(buf, position); + } + + @Override + public void close() + { + helper.clear(); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + helper.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchObjectStrategy.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchObjectStrategy.java new file mode 100644 index 000000000000..97e670a625a5 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchObjectStrategy.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import it.unimi.dsi.fastutil.bytes.ByteArrays; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.segment.data.ObjectStrategy; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class KllDoublesSketchObjectStrategy implements ObjectStrategy +{ + + @Override + public int compare(final KllDoublesSketch s1, final KllDoublesSketch s2) + { + return KllDoublesSketchAggregatorFactory.COMPARATOR.compare(s1, s2); + } + + @Override + public KllDoublesSketch fromByteBuffer(final ByteBuffer buffer, final int numBytes) + { + if (numBytes == 0) { + return KllDoublesSketchOperations.EMPTY_SKETCH; + } + return KllDoublesSketch.wrap(Memory.wrap(buffer, ByteOrder.LITTLE_ENDIAN).region(buffer.position(), numBytes)); + } + + @Override + public Class getClazz() + { + return KllDoublesSketch.class; + } + + @Override + public byte[] toBytes(final KllDoublesSketch sketch) + { + if (sketch == null || sketch.isEmpty()) { + return ByteArrays.EMPTY_ARRAY; + } + return sketch.toByteArray(); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchOperations.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchOperations.java new file mode 100644 index 000000000000..57cb51747101 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchOperations.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; + +import java.nio.charset.StandardCharsets; + +public class KllDoublesSketchOperations +{ + + public static final KllDoublesSketch EMPTY_SKETCH = KllDoublesSketch.newHeapInstance(); + + public static KllDoublesSketch deserialize(final Object serializedSketch) + { + if (serializedSketch instanceof String) { + return deserializeFromBase64EncodedString((String) serializedSketch); + } else if (serializedSketch instanceof byte[]) { + return deserializeFromByteArray((byte[]) serializedSketch); + } else if (serializedSketch instanceof KllDoublesSketch) { + return (KllDoublesSketch) serializedSketch; + } + throw new ISE( + "Object is not of a type that can be deserialized to a quantiles DoublesSketch: %s", + serializedSketch == null ? "null" : serializedSketch.getClass() + ); + } + + public static KllDoublesSketch deserializeFromBase64EncodedString(final String str) + { + return deserializeFromByteArray(StringUtils.decodeBase64(str.getBytes(StandardCharsets.UTF_8))); + } + + public static KllDoublesSketch deserializeFromByteArray(final byte[] data) + { + return KllDoublesSketch.wrap(Memory.wrap(data)); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregator.java new file mode 100644 index 000000000000..63aa868c343d --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregator.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllDoublesSketchToCDFPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double[] splitPoints; + + @JsonCreator + public KllDoublesSketchToCDFPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("splitPoints") final double[] splitPoints) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.splitPoints = Preconditions.checkNotNull(splitPoints, "array of split points is null"); + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final double[] cdf = new double[splitPoints.length + 1]; + Arrays.fill(cdf, Double.NaN); + return cdf; + } + return sketch.getCDF(splitPoints); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getSplitPoints() + { + return splitPoints; + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing histograms is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", splitPoints=" + Arrays.toString(splitPoints) + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllDoublesSketchToCDFPostAggregator that = (KllDoublesSketchToCDFPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(splitPoints, that.splitPoints)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + int hashCode = name.hashCode() * 31 + field.hashCode(); + hashCode = hashCode * 31 + Arrays.hashCode(splitPoints); + return hashCode; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_CDF_CACHE_TYPE_ID).appendCacheable(field); + for (final double value : splitPoints) { + builder.appendDouble(value); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregator.java new file mode 100644 index 000000000000..9ab047752665 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregator.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllDoublesSketchToHistogramPostAggregator implements PostAggregator +{ + static final int DEFAULT_NUM_BINS = 10; + + private final String name; + private final PostAggregator field; + private final double[] splitPoints; + private final Integer numBins; + + @JsonCreator + public KllDoublesSketchToHistogramPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("splitPoints") @Nullable final double[] splitPoints, + @JsonProperty("numBins") @Nullable final Integer numBins) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.splitPoints = splitPoints; + this.numBins = numBins; + if (splitPoints != null && numBins != null) { + throw new IAE("Cannot accept both 'splitPoints' and 'numBins'"); + } + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + final int numBins = splitPoints != null ? splitPoints.length + 1 : + (this.numBins != null ? this.numBins.intValue() : DEFAULT_NUM_BINS); + if (numBins < 2) { + throw new IAE("at least 2 bins expected"); + } + if (sketch.isEmpty()) { + final double[] histogram = new double[numBins]; + Arrays.fill(histogram, Double.NaN); + return histogram; + } + final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints : + equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue())); + for (int i = 0; i < histogram.length; i++) { + histogram[i] *= sketch.getN(); // scale fractions to counts + } + return histogram; + } + + // retuns num-1 points that split the interval [min, max] into num equally-spaced intervals + // num must be at least 2 + private static double[] equallySpacedPoints(final int num, final double min, final double max) + { + final double[] points = new double[num - 1]; + final double delta = (max - min) / num; + for (int i = 0; i < num - 1; i++) { + points[i] = min + delta * (i + 1); + } + return points; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + /** + * actual type is {@link KllDoublesSketch} + * @param signature + */ + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public double[] getSplitPoints() + { + return splitPoints; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getNumBins() + { + return numBins; + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing histograms is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", splitPoints=" + Arrays.toString(splitPoints) + + ", numBins=" + numBins + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllDoublesSketchToHistogramPostAggregator that = (KllDoublesSketchToHistogramPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(splitPoints, that.splitPoints)) { + return false; + } + if (!field.equals(that.field)) { + return false; + } + if (numBins == null && that.numBins == null) { + return true; + } + if (numBins != null && numBins.equals(that.numBins)) { + return true; + } + return false; + } + + @Override + public int hashCode() + { + int hashCode = name.hashCode() * 31 + field.hashCode(); + hashCode = hashCode * 31 + Arrays.hashCode(splitPoints); + if (numBins != null) { + hashCode = hashCode * 31 + numBins.hashCode(); + } + return hashCode; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_HISTOGRAM_CACHE_TYPE_ID).appendCacheable(field); + if (splitPoints != null) { + for (final double value : splitPoints) { + builder.appendDouble(value); + } + } + if (numBins != null) { + builder.appendInt(numBins); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregator.java new file mode 100644 index 000000000000..d2840d1fcdc4 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregator.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllDoublesSketchToQuantilePostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double fraction; + + @JsonCreator + public KllDoublesSketchToQuantilePostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("fraction") final double fraction) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.fraction = fraction; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double getFraction() + { + return fraction; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + return sketch.getQuantile(fraction); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_QUANTILE_CACHE_TYPE_ID) + .appendCacheable(field).appendDouble(fraction).build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fraction=" + fraction + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllDoublesSketchToQuantilePostAggregator that = (KllDoublesSketchToQuantilePostAggregator) o; + return Double.compare(that.fraction, fraction) == 0 && + name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, fraction); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregator.java new file mode 100644 index 000000000000..9862b08b1f30 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregator.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllDoublesSketchToQuantilesPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double[] fractions; + + @JsonCreator + public KllDoublesSketchToQuantilesPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("fractions") final double[] fractions) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.fractions = Preconditions.checkNotNull(fractions, "array of fractions is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getFractions() + { + return fractions; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final double[] quantiles = new double[fractions.length]; + Arrays.fill(quantiles, Double.NaN); + return quantiles; + } + return sketch.getQuantiles(fractions); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing arrays of quantiles is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fractions=" + Arrays.toString(fractions) + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllDoublesSketchToQuantilesPostAggregator that = (KllDoublesSketchToQuantilesPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(fractions, that.fractions)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(fractions); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_QUANTILES_CACHE_TYPE_ID).appendCacheable(field); + for (final double value : fractions) { + builder.appendDouble(value); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregator.java new file mode 100644 index 000000000000..eaf5b1389fa3 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregator.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllDoublesSketchToRankPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double value; + + @JsonCreator + public KllDoublesSketchToRankPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("value") final double value) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.value = value; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double getValue() + { + return value; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + return sketch.getRank(value); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_RANK_CACHE_TYPE_ID) + .appendCacheable(field).appendDouble(value).build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", value=" + value + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllDoublesSketchToRankPostAggregator that = (KllDoublesSketchToRankPostAggregator) o; + return Double.compare(that.value, value) == 0 && + name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, value); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregator.java new file mode 100644 index 000000000000..6fdce557e2ea --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregator.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllDoublesSketchToStringPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + + @JsonCreator + public KllDoublesSketchToStringPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.STRING; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators); + return sketch.toString(); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing sketch summaries is not supported"); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_DOUBLES_SKETCH_TO_STRING_CACHE_TYPE_ID).appendCacheable(field); + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return this.getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllDoublesSketchToStringPostAggregator that = (KllDoublesSketchToStringPostAggregator) o; + return name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java new file mode 100644 index 000000000000..72ae74ea83d6 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.ColumnProcessors; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.VectorColumnProcessorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +public class KllFloatsSketchAggregatorFactory extends KllSketchAggregatorFactory +{ + public static final Comparator COMPARATOR = + Comparator.nullsFirst(Comparator.comparingLong(KllFloatsSketch::getN)); + + @JsonCreator + public KllFloatsSketchAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName, + @JsonProperty("k") @Nullable final Integer k, + @JsonProperty("maxStreamLength") @Nullable final Long maxStreamLength + ) + { + this(name, fieldName, k, maxStreamLength, AggregatorUtil.KLL_FLOATS_SKETCH_BUILD_CACHE_TYPE_ID); + } + + KllFloatsSketchAggregatorFactory( + final String name, + final String fieldName, + @Nullable final Integer k, + @Nullable final Long maxStreamLength, + final byte cacheTypeId + ) + { + super( + name, + fieldName, + k, + maxStreamLength, + cacheTypeId + ); + } + + @Override + public Comparator getComparator() + { + return COMPARATOR; + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new KllFloatsSketchAggregatorFactory( + getFieldName(), + getFieldName(), + getK(), + getMaxStreamLength() + ) + ); + } + + @Override + public AggregatorFactory getMergingFactory(final AggregatorFactory other) + throws AggregatorFactoryNotMergeableException + { + if (other.getName().equals(this.getName()) && other instanceof KllFloatsSketchAggregatorFactory) { + // KllSketch supports merging with different k. + // The result will have effective k between the specified k and the minimum k from all input sketches + // to achieve higher accuracy as much as possible. + return new KllFloatsSketchMergeAggregatorFactory( + getName(), + Math.max(getK(), ((KllFloatsSketchAggregatorFactory) other).getK()), + getMaxStreamLength() + ); + } else { + throw new AggregatorFactoryNotMergeableException(this, other); + } + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new KllFloatsSketchMergeAggregatorFactory(getName(), getK(), getMaxStreamLength()); + } + + @Override + KllFloatsSketch getEmptySketch() + { + return KllFloatsSketchOperations.EMPTY_SKETCH; + } + + @Override + KllFloatsSketch newHeapInstance(final int k) + { + return KllFloatsSketch.newHeapInstance(k); + } + + @Override + Class getSketchClass() + { + return KllFloatsSketch.class; + } + + @Override + int getMaxSerializedSizeBytes(final int k, final long n) + { + return KllFloatsSketch.getMaxSerializedSizeBytes(k, n, true); + } + + @Override + KllSketchBuildAggregator getBuildAggregator(final ColumnValueSelector selector) + { + return new KllFloatsSketchBuildAggregator(selector, getK()); + } + + @Override + KllSketchMergeAggregator getMergeAggregator(final ColumnValueSelector selector) + { + return new KllFloatsSketchMergeAggregator(selector, getK()); + } + + @Override + KllFloatsSketchBuildBufferAggregator getBuildBufferAggregator(ColumnValueSelector selector) + { + return new KllFloatsSketchBuildBufferAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + KllFloatsSketchMergeBufferAggregator getMergeBufferAggregator(ColumnValueSelector selector) + { + return new KllFloatsSketchMergeBufferAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + return ColumnProcessors.makeVectorProcessor( + getFieldName(), + new VectorColumnProcessorFactory() + { + @Override + public VectorAggregator makeSingleValueDimensionProcessor( + ColumnCapabilities capabilities, + SingleValueDimensionVectorSelector selector + ) + { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + + @Override + public VectorAggregator makeMultiValueDimensionProcessor( + ColumnCapabilities capabilities, + MultiValueDimensionVectorSelector selector + ) + { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + + @Override + public VectorAggregator makeFloatProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllFloatsSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeDoubleProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllFloatsSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeLongProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new KllFloatsSketchBuildVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + + @Override + public VectorAggregator makeObjectProcessor(ColumnCapabilities capabilities, VectorObjectSelector selector) + { + return new KllFloatsSketchMergeVectorAggregator(selector, getK(), getMaxIntermediateSizeWithNulls()); + } + }, + selectorFactory + ); + } + + @Override + public Object deserialize(final Object object) + { + return KllFloatsSketchOperations.deserialize(object); + } + + @Override + public ColumnType getIntermediateType() + { + return KllSketchModule.FLOATS_TYPE; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildAggregator.java new file mode 100644 index 000000000000..cadb5a72b1d8 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildAggregator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.segment.ColumnValueSelector; + +public class KllFloatsSketchBuildAggregator extends KllSketchBuildAggregator +{ + public KllFloatsSketchBuildAggregator(final ColumnValueSelector valueSelector, final int size) + { + super(valueSelector, KllFloatsSketch.newHeapInstance(size)); + } + + @Override + public synchronized void aggregate() + { + if (valueSelector.isNull()) { + return; + } + sketch.update(valueSelector.getFloat()); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregator.java new file mode 100644 index 000000000000..7c8733faa171 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +public class KllFloatsSketchBuildBufferAggregator + extends KllSketchBuildBufferAggregator +{ + public KllFloatsSketchBuildBufferAggregator( + final ColumnValueSelector valueSelector, + final int size, + final int maxIntermediateSize + ) + { + super(valueSelector, new KllFloatsSketchBuildBufferAggregatorHelper(size, maxIntermediateSize)); + } + + @Override + public void aggregate(final ByteBuffer buffer, final int position) + { + if (selector.isNull()) { + return; + } + + final KllFloatsSketch sketch = helper.getSketchAtPosition(buffer, position); + sketch.update(selector.getFloat()); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java new file mode 100644 index 000000000000..360d864f962d --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +public class KllFloatsSketchBuildBufferAggregatorHelper + extends KllSketchBuildBufferAggregatorHelper +{ + public KllFloatsSketchBuildBufferAggregatorHelper(final int size, final int maxIntermediateSize) + { + super(size, maxIntermediateSize); + } + + @Override + KllFloatsSketch newDirectInstance(final int k, final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllFloatsSketch.newDirectInstance(mem, reqServer); + } + + @Override + KllFloatsSketch writableWrap(final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllFloatsSketch.writableWrap(mem, reqServer); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildVectorAggregator.java new file mode 100644 index 000000000000..32dfa4880d55 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildVectorAggregator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class KllFloatsSketchBuildVectorAggregator implements VectorAggregator +{ + private final VectorValueSelector selector; + private final KllFloatsSketchBuildBufferAggregatorHelper helper; + + KllFloatsSketchBuildVectorAggregator( + final VectorValueSelector selector, + final int size, + final int maxIntermediateSize + ) + { + this.selector = selector; + this.helper = new KllFloatsSketchBuildBufferAggregatorHelper(size, maxIntermediateSize); + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + helper.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final float[] floats = selector.getFloatVector(); + final boolean[] nulls = selector.getNullVector(); + + final KllFloatsSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (nulls == null || !nulls[i]) { + sketch.update(floats[i]); + } + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final float[] floats = selector.getFloatVector(); + final boolean[] nulls = selector.getNullVector(); + + for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; + + if (nulls == null || !nulls[idx]) { + final int position = positions[i] + positionOffset; + helper.getSketchAtPosition(buf, position).update(floats[idx]); + } + } + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return helper.get(buf, position); + } + + @Override + public void relocate(final int oldPosition, final int newPosition, final ByteBuffer oldBuf, final ByteBuffer newBuf) + { + helper.relocate(oldPosition, newPosition, oldBuf, newBuf); + } + + @Override + public void close() + { + helper.clear(); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerde.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerde.java new file mode 100644 index 000000000000..4a71befe0c66 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerde.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.google.common.primitives.Floats; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexColumnPartSupplier; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class KllFloatsSketchComplexMetricSerde extends ComplexMetricSerde +{ + + private static final KllFloatsSketchObjectStrategy STRATEGY = new KllFloatsSketchObjectStrategy(); + + @Override + public String getTypeName() + { + return KllSketchModule.FLOATS_SKETCH; + } + + @Override + public ObjectStrategy getObjectStrategy() + { + return STRATEGY; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + private static final int MIN_K = 8; // package one input value into the smallest sketch + + @Override + public Class extractedClass() + { + return KllFloatsSketch.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + final Object object = inputRow.getRaw(metricName); + if (object instanceof String) { // everything is a string during ingestion + final String objectString = (String) object; + // Autodetection of the input format: empty string, number, or base64 encoded sketch + // A serialized KllFloatsSketch, as currently implemented, always has 0 in the first 5 bits. + // This is not a digit in base64 + final Float floatValue; + if (objectString.isEmpty()) { + return KllFloatsSketchOperations.EMPTY_SKETCH; + } else if ((floatValue = Floats.tryParse(objectString)) != null) { + final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(MIN_K); + sketch.update(floatValue); + return sketch; + } + } else if (object instanceof Number) { // this is for reindexing + final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(MIN_K); + sketch.update(((Number) object).floatValue()); + return sketch; + } + + if (object == null || object instanceof KllFloatsSketch || object instanceof Memory) { + return object; + } + return KllFloatsSketchOperations.deserialize(object); + } + }; + } + + @Override + public void deserializeColumn(final ByteBuffer buffer, final ColumnBuilder builder) + { + final GenericIndexed column = GenericIndexed.read(buffer, STRATEGY, builder.getFileMapper()); + builder.setComplexColumnSupplier(new ComplexColumnPartSupplier(getTypeName(), column)); + } + + // support large columns + @Override + public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) + { + return LargeColumnSupportedComplexColumnSerializer.create(segmentWriteOutMedium, column, this.getObjectStrategy()); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchJsonSerializer.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchJsonSerializer.java new file mode 100644 index 000000000000..47223bbd0796 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchJsonSerializer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import org.apache.datasketches.kll.KllFloatsSketch; + +import java.io.IOException; + +public class KllFloatsSketchJsonSerializer extends JsonSerializer +{ + + @Override + public void serialize(final KllFloatsSketch sketch, final JsonGenerator generator, final SerializerProvider provider) + throws IOException + { + generator.writeBinary(sketch.toByteArray()); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregator.java new file mode 100644 index 000000000000..211a3e811b84 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.segment.ColumnValueSelector; + +public class KllFloatsSketchMergeAggregator extends KllSketchMergeAggregator +{ + public KllFloatsSketchMergeAggregator(final ColumnValueSelector selector, final int k) + { + super(selector, KllFloatsSketch.newHeapInstance(k)); + } + + @Override + public synchronized void aggregate() + { + updateUnion(selector, union); + } + + static void updateUnion(ColumnValueSelector selector, KllFloatsSketch union) + { + final Object object = selector.getObject(); + if (object == null) { + return; + } + if (object instanceof KllFloatsSketch) { + union.merge((KllFloatsSketch) object); + } else { + union.update(selector.getFloat()); + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactory.java new file mode 100644 index 000000000000..e1b0c2d6244c --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactory.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.annotations.VisibleForTesting; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.NilColumnValueSelector; + +import javax.annotation.Nullable; + +public class KllFloatsSketchMergeAggregatorFactory extends KllFloatsSketchAggregatorFactory +{ + + @JsonCreator + public KllFloatsSketchMergeAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("k") @Nullable final Integer k, + @JsonProperty("maxStreamLength") @Nullable final Long maxStreamLength + ) + { + super(name, name, k, maxStreamLength, AggregatorUtil.KLL_FLOATS_SKETCH_MERGE_CACHE_TYPE_ID); + } + + @VisibleForTesting + KllFloatsSketchMergeAggregatorFactory( + final String name, + @Nullable final Integer k + ) + { + this(name, k, null); + } + + @Override + public Aggregator factorize(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpAggregator(KllFloatsSketchOperations.EMPTY_SKETCH); + } + return getMergeAggregator(selector); + } + + @Override + public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpBufferAggregator(KllFloatsSketchOperations.EMPTY_SKETCH); + } + return getMergeBufferAggregator(selector); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregator.java new file mode 100644 index 000000000000..4ea770c60dc0 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregator.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +public class KllFloatsSketchMergeBufferAggregator extends KllSketchMergeBufferAggregator +{ + public KllFloatsSketchMergeBufferAggregator( + final ColumnValueSelector selector, + final int k, + final int maxIntermediateSize + ) + { + super(selector, new KllFloatsSketchMergeBufferAggregatorHelper(k, maxIntermediateSize)); + } + + @Override + public void aggregate(final ByteBuffer buffer, final int position) + { + KllFloatsSketchMergeAggregator.updateUnion(selector, helper.getSketchAtPosition(buffer, position)); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java new file mode 100644 index 000000000000..e5ef68c6268d --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +public class KllFloatsSketchMergeBufferAggregatorHelper extends KllSketchMergeBufferAggregatorHelper +{ + public KllFloatsSketchMergeBufferAggregatorHelper( + final int k, + final int maxIntermediateSize + ) + { + super(k, maxIntermediateSize); + } + + @Override + KllFloatsSketch newDirectInstance(final int k, final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllFloatsSketch.newDirectInstance(mem, reqServer); + } + + @Override + KllFloatsSketch writableWrap(final WritableMemory mem, final MemoryRequestServer reqServer) + { + return KllFloatsSketch.writableWrap(mem, reqServer); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeVectorAggregator.java new file mode 100644 index 000000000000..d9f7f4f81e62 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeVectorAggregator.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class KllFloatsSketchMergeVectorAggregator implements VectorAggregator +{ + private final VectorObjectSelector selector; + private final KllFloatsSketchMergeBufferAggregatorHelper helper; + + public KllFloatsSketchMergeVectorAggregator( + final VectorObjectSelector selector, + final int k, + final int maxIntermediateSize + ) + { + this.selector = selector; + this.helper = new KllFloatsSketchMergeBufferAggregatorHelper(k, maxIntermediateSize); + } + + @Override + public void init(ByteBuffer buf, int position) + { + helper.init(buf, position); + } + + @Override + public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + { + final Object[] vector = selector.getObjectVector(); + + final KllFloatsSketch union = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + final KllFloatsSketch sketch = (KllFloatsSketch) vector[i]; + if (sketch != null) { + union.merge(sketch); + } + } + } + + @Override + public void aggregate( + final ByteBuffer buf, + final int numRows, + final int[] positions, + @Nullable final int[] rows, + final int positionOffset + ) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final KllFloatsSketch sketch = (KllFloatsSketch) vector[rows != null ? rows[i] : i]; + + if (sketch != null) { + final int position = positions[i] + positionOffset; + final KllFloatsSketch union = helper.getSketchAtPosition(buf, position); + union.merge(sketch); + } + } + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return helper.get(buf, position); + } + + @Override + public void close() + { + helper.clear(); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + helper.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchObjectStrategy.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchObjectStrategy.java new file mode 100644 index 000000000000..ff177a2f54f1 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchObjectStrategy.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import it.unimi.dsi.fastutil.bytes.ByteArrays; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.segment.data.ObjectStrategy; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class KllFloatsSketchObjectStrategy implements ObjectStrategy +{ + + @Override + public int compare(final KllFloatsSketch s1, final KllFloatsSketch s2) + { + return KllFloatsSketchAggregatorFactory.COMPARATOR.compare(s1, s2); + } + + @Override + public KllFloatsSketch fromByteBuffer(final ByteBuffer buffer, final int numBytes) + { + if (numBytes == 0) { + return KllFloatsSketchOperations.EMPTY_SKETCH; + } + return KllFloatsSketch.wrap(Memory.wrap(buffer, ByteOrder.LITTLE_ENDIAN).region(buffer.position(), numBytes)); + } + + @Override + public Class getClazz() + { + return KllFloatsSketch.class; + } + + @Override + public byte[] toBytes(final KllFloatsSketch sketch) + { + if (sketch == null || sketch.isEmpty()) { + return ByteArrays.EMPTY_ARRAY; + } + return sketch.toByteArray(); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchOperations.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchOperations.java new file mode 100644 index 000000000000..e32b67b2549a --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchOperations.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.Memory; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; + +import java.nio.charset.StandardCharsets; + +public class KllFloatsSketchOperations +{ + + public static final KllFloatsSketch EMPTY_SKETCH = KllFloatsSketch.newHeapInstance(); + + public static KllFloatsSketch deserialize(final Object serializedSketch) + { + if (serializedSketch instanceof String) { + return deserializeFromBase64EncodedString((String) serializedSketch); + } else if (serializedSketch instanceof byte[]) { + return deserializeFromByteArray((byte[]) serializedSketch); + } else if (serializedSketch instanceof KllFloatsSketch) { + return (KllFloatsSketch) serializedSketch; + } + throw new ISE( + "Object is not of a type that can be deserialized to a KllFloatsSketch: %s", + serializedSketch == null ? "null" : serializedSketch.getClass() + ); + } + + public static KllFloatsSketch deserializeFromBase64EncodedString(final String str) + { + return deserializeFromByteArray(StringUtils.decodeBase64(str.getBytes(StandardCharsets.UTF_8))); + } + + public static KllFloatsSketch deserializeFromByteArray(final byte[] data) + { + return KllFloatsSketch.wrap(Memory.wrap(data)); + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregator.java new file mode 100644 index 000000000000..a5d98ae79dc9 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregator.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllFloatsSketchToCDFPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final float[] splitPoints; + + @JsonCreator + public KllFloatsSketchToCDFPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("splitPoints") final float[] splitPoints) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.splitPoints = Preconditions.checkNotNull(splitPoints, "array of split points is null"); + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final double[] cdf = new double[splitPoints.length + 1]; + Arrays.fill(cdf, Double.NaN); + return cdf; + } + return sketch.getCDF(splitPoints); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public float[] getSplitPoints() + { + return splitPoints; + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing histograms is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", splitPoints=" + Arrays.toString(splitPoints) + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllFloatsSketchToCDFPostAggregator that = (KllFloatsSketchToCDFPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(splitPoints, that.splitPoints)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + int hashCode = name.hashCode() * 31 + field.hashCode(); + hashCode = hashCode * 31 + Arrays.hashCode(splitPoints); + return hashCode; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_FLOATS_SKETCH_TO_CDF_CACHE_TYPE_ID).appendCacheable(field); + for (final float value : splitPoints) { + builder.appendFloat(value); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregator.java new file mode 100644 index 000000000000..fa1761bfcce7 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregator.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllFloatsSketchToHistogramPostAggregator implements PostAggregator +{ + static final int DEFAULT_NUM_BINS = 10; + + private final String name; + private final PostAggregator field; + private final float[] splitPoints; + private final Integer numBins; + + @JsonCreator + public KllFloatsSketchToHistogramPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("splitPoints") @Nullable final float[] splitPoints, + @JsonProperty("numBins") @Nullable final Integer numBins) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.splitPoints = splitPoints; + this.numBins = numBins; + if (splitPoints != null && numBins != null) { + throw new IAE("Cannot accept both 'splitPoints' and 'numBins'"); + } + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + final int numBins = splitPoints != null ? splitPoints.length + 1 : + (this.numBins != null ? this.numBins.intValue() : DEFAULT_NUM_BINS); + if (numBins < 2) { + throw new IAE("at least 2 bins expected"); + } + if (sketch.isEmpty()) { + final double[] histogram = new double[numBins]; + Arrays.fill(histogram, Double.NaN); + return histogram; + } + final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints : + equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue())); + for (int i = 0; i < histogram.length; i++) { + histogram[i] *= sketch.getN(); // scale fractions to counts + } + return histogram; + } + + // retuns num-1 points that split the interval [min, max] into num equally-spaced intervals + // num must be at least 2 + private static float[] equallySpacedPoints(final int num, final float min, final float max) + { + final float[] points = new float[num - 1]; + final float delta = (max - min) / num; + for (int i = 0; i < num - 1; i++) { + points[i] = min + delta * (i + 1); + } + return points; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + /** + * actual type is {@link KllFloatsSketch} + * @param signature + */ + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public float[] getSplitPoints() + { + return splitPoints; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getNumBins() + { + return numBins; + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing histograms is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", splitPoints=" + Arrays.toString(splitPoints) + + ", numBins=" + numBins + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllFloatsSketchToHistogramPostAggregator that = (KllFloatsSketchToHistogramPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(splitPoints, that.splitPoints)) { + return false; + } + if (!field.equals(that.field)) { + return false; + } + if (numBins == null && that.numBins == null) { + return true; + } + if (numBins != null && numBins.equals(that.numBins)) { + return true; + } + return false; + } + + @Override + public int hashCode() + { + int hashCode = name.hashCode() * 31 + field.hashCode(); + hashCode = hashCode * 31 + Arrays.hashCode(splitPoints); + if (numBins != null) { + hashCode = hashCode * 31 + numBins.hashCode(); + } + return hashCode; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_FLOATS_SKETCH_TO_HISTOGRAM_CACHE_TYPE_ID).appendCacheable(field); + if (splitPoints != null) { + for (final float value : splitPoints) { + builder.appendFloat(value); + } + } + if (numBins != null) { + builder.appendInt(numBins); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregator.java new file mode 100644 index 000000000000..a6ab5f0e5d80 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregator.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllFloatsSketchToQuantilePostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double fraction; + + @JsonCreator + public KllFloatsSketchToQuantilePostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("fraction") final double fraction) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.fraction = fraction; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.FLOAT; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double getFraction() + { + return fraction; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + return sketch.getQuantile(fraction); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(PostAggregatorIds.KLL_FLOATS_SKETCH_TO_QUANTILE_CACHE_TYPE_ID) + .appendCacheable(field).appendDouble(fraction).build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fraction=" + fraction + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllFloatsSketchToQuantilePostAggregator that = (KllFloatsSketchToQuantilePostAggregator) o; + return Double.compare(that.fraction, fraction) == 0 && + name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, fraction); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregator.java new file mode 100644 index 000000000000..8fac44daf77d --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregator.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class KllFloatsSketchToQuantilesPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final double[] fractions; + + @JsonCreator + public KllFloatsSketchToQuantilesPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("fractions") final double[] fractions) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.fractions = Preconditions.checkNotNull(fractions, "array of fractions is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.FLOAT_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getFractions() + { + return fractions; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + if (sketch.isEmpty()) { + final float[] quantiles = new float[fractions.length]; + Arrays.fill(quantiles, Float.NaN); + return quantiles; + } + return sketch.getQuantiles(fractions); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing arrays of quantiles is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fractions=" + Arrays.toString(fractions) + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final KllFloatsSketchToQuantilesPostAggregator that = (KllFloatsSketchToQuantilesPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(fractions, that.fractions)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(fractions); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_FLOATS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID).appendCacheable(field); + for (final double value : fractions) { + builder.appendDouble(value); + } + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregator.java new file mode 100644 index 000000000000..7fb9600502de --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregator.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllFloatsSketchToRankPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + private final float value; + + @JsonCreator + public KllFloatsSketchToRankPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("value") final float value) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.value = value; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public float getValue() + { + return value; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + return sketch.getRank(value); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(PostAggregatorIds.KLL_FLOATS_SKETCH_TO_RANK_CACHE_TYPE_ID) + .appendCacheable(field).appendFloat(value).build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", value=" + value + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllFloatsSketchToRankPostAggregator that = (KllFloatsSketchToRankPostAggregator) o; + return Float.compare(that.value, value) == 0 && + name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, value); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregator.java new file mode 100644 index 000000000000..e5aeb0a42feb --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregator.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class KllFloatsSketchToStringPostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + + @JsonCreator + public KllFloatsSketchToStringPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.STRING; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators); + return sketch.toString(); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing sketch summaries is not supported"); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.KLL_FLOATS_SKETCH_TO_STRING_CACHE_TYPE_ID).appendCacheable(field); + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return this.getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllFloatsSketchToStringPostAggregator that = (KllFloatsSketchToStringPostAggregator) o; + return name.equals(that.name) && + field.equals(that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchAggregatorFactory.java new file mode 100644 index 000000000000..df181cc7dd96 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchAggregatorFactory.java @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregateCombiner; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.ObjectAggregateCombiner; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.NilColumnValueSelector; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +abstract class KllSketchAggregatorFactory extends AggregatorFactory +{ + public static final int DEFAULT_K = 200; + + // Used for sketch size estimation. + public static final long DEFAULT_MAX_STREAM_LENGTH = 1_000_000_000; + + private final String name; + private final String fieldName; + private final int k; + private final long maxStreamLength; + private final byte cacheTypeId; + + KllSketchAggregatorFactory( + final String name, + final String fieldName, + @Nullable final Integer k, + @Nullable final Long maxStreamLength, + final byte cacheTypeId + ) + { + if (name == null) { + throw new IAE("Must have a valid, non-null aggregator name"); + } + this.name = name; + if (fieldName == null) { + throw new IAE("Parameter fieldName must be specified"); + } + this.fieldName = fieldName; + this.k = k == null ? DEFAULT_K : k; + this.maxStreamLength = maxStreamLength == null ? DEFAULT_MAX_STREAM_LENGTH : maxStreamLength; + this.cacheTypeId = cacheTypeId; + } + + @Override + public Aggregator factorize(final ColumnSelectorFactory metricFactory) + { + if (metricFactory.getColumnCapabilities(fieldName) != null + && metricFactory.getColumnCapabilities(fieldName).isNumeric()) { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpAggregator(getEmptySketch()); + } + return getBuildAggregator(selector); + } + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpAggregator(getEmptySketch()); + } + return getMergeAggregator(selector); + } + + @Override + public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) + { + if (metricFactory.getColumnCapabilities(fieldName) != null + && metricFactory.getColumnCapabilities(fieldName).isNumeric()) { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + return getBuildBufferAggregator(selector); + } + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + return getMergeBufferAggregator(selector); + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + + @Override + public Object combine(final Object lhs, final Object rhs) + { + final SketchType sketch = newHeapInstance(k); + sketch.merge((SketchType) lhs); + sketch.merge((SketchType) rhs); + return sketch; + } + + @Override + public AggregateCombiner makeAggregateCombiner() + { + return new ObjectAggregateCombiner() + { + private final SketchType union = newHeapInstance(k); + + @Override + public void reset(final ColumnValueSelector selector) + { + union.reset(); + fold(selector); + } + + @Override + public void fold(final ColumnValueSelector selector) + { + final SketchType sketch = (SketchType) selector.getObject(); + union.merge(sketch); + } + + @Nullable + @Override + public SketchType getObject() + { + return union; + } + + @Override + public Class classOfObject() + { + return getSketchClass(); + } + }; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public String getFieldName() + { + return fieldName; + } + + @JsonProperty + public int getK() + { + return k; + } + + @JsonProperty + public long getMaxStreamLength() + { + return maxStreamLength; + } + + @Override + public List requiredFields() + { + return Collections.singletonList(fieldName); + } + + @Override + public int guessAggregatorHeapFootprint(long rows) + { + return getMaxSerializedSizeBytes(k, rows); + } + + // Quantiles sketches never stop growing, but they do so very slowly. + // This size must suffice for overwhelming majority of sketches, + // but some sketches may request more memory on heap and move there + @Override + public int getMaxIntermediateSize() + { + return getMaxSerializedSizeBytes(k, maxStreamLength); + } + + @Nullable + @Override + public Object finalizeComputation(@Nullable final Object object) + { + return object == null ? null : ((SketchType) object).getN(); + } + + @Override + public ColumnType getResultType() + { + return ColumnType.LONG; + } + + @Override + public byte[] getCacheKey() + { + // maxStreamLength is not included in the cache key as it does nothing with query result. + return new CacheKeyBuilder(cacheTypeId).appendString(name).appendString(fieldName).appendInt(k).build(); + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KllSketchAggregatorFactory that = (KllSketchAggregatorFactory) o; + return k == that.k + && maxStreamLength == that.maxStreamLength + && name.equals(that.name) + && fieldName.equals(that.fieldName); + } + + @Override + public int hashCode() + { + return Objects.hash(name, fieldName, k, maxStreamLength); // no need to use cacheTypeId here + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name=" + name + + ", fieldName=" + fieldName + + ", k=" + k + + "}"; + } + + abstract SketchType getEmptySketch(); + + abstract SketchType newHeapInstance(int k); + + abstract Class getSketchClass(); + + abstract int getMaxSerializedSizeBytes(int k, long n); + + abstract KllSketchBuildAggregator getBuildAggregator( + ColumnValueSelector selector); + + abstract KllSketchMergeAggregator getMergeAggregator(ColumnValueSelector selector); + + abstract KllSketchBuildBufferAggregator + getBuildBufferAggregator(ColumnValueSelector selector); + + abstract KllSketchMergeBufferAggregator + getMergeBufferAggregator(ColumnValueSelector selector); +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildAggregator.java new file mode 100644 index 000000000000..66f3a1671fc5 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildAggregator.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nullable; + +abstract class KllSketchBuildAggregator implements Aggregator +{ + protected final ColumnValueSelector valueSelector; + @Nullable protected SketchType sketch; + + KllSketchBuildAggregator(final ColumnValueSelector valueSelector, final SketchType sketch) + { + this.valueSelector = valueSelector; + this.sketch = sketch; + } + + @Override + public synchronized Object get() + { + return sketch; + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public synchronized void close() + { + sketch = null; + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregator.java new file mode 100644 index 000000000000..1809b2111bb2 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregator.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +abstract class KllSketchBuildBufferAggregator implements BufferAggregator +{ + + protected final ColumnValueSelector selector; + protected final KllSketchBuildBufferAggregatorHelper helper; + + public KllSketchBuildBufferAggregator( + final ColumnValueSelector valueSelector, + final KllSketchBuildBufferAggregatorHelper helper + ) + { + this.selector = valueSelector; + this.helper = helper; + } + + @Override + public void init(ByteBuffer buf, int position) + { + helper.init(buf, position); + } + + @Nullable + @Override + public Object get(ByteBuffer buf, int position) + { + return helper.get(buf, position); + } + + @Override + public float getFloat(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + helper.clear(); + } + + // A small number of sketches may run out of the given memory, request more memory on heap and move there. + // In that case we need to reuse the object from the cache as opposed to wrapping the new buffer. + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + helper.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public void inspectRuntimeShape(final RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java new file mode 100644 index 000000000000..f0d0ea889b83 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import org.apache.datasketches.kll.KllSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.IdentityHashMap; + +abstract class KllSketchBuildBufferAggregatorHelper +{ + private static final MemoryRequestServer MEM_REQ_SERVER = new DefaultMemoryRequestServer(); + private final int size; + private final int maxIntermediateSize; + private final IdentityHashMap memCache = new IdentityHashMap<>(); + private final IdentityHashMap> sketches = new IdentityHashMap<>(); + + public KllSketchBuildBufferAggregatorHelper(final int size, final int maxIntermediateSize) + { + this.size = size; + this.maxIntermediateSize = maxIntermediateSize; + } + + public void init(final ByteBuffer buffer, final int position) + { + final WritableMemory mem = getMemory(buffer); + final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); + final SketchType sketch = newDirectInstance(size, region, MEM_REQ_SERVER); + putSketch(buffer, position, sketch); + } + + public SketchType get(final ByteBuffer buffer, final int position) + { + return sketches.get(buffer).get(position); + } + + // A small number of sketches may run out of the given memory, request more memory on heap and move there. + // In that case we need to reuse the object from the cache as opposed to wrapping the new buffer. + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + SketchType sketch = sketches.get(oldBuffer).get(oldPosition); + final WritableMemory oldRegion = getMemory(oldBuffer).writableRegion(oldPosition, maxIntermediateSize); + if (sketch.isSameResource(oldRegion)) { // sketch was not relocated on heap + final WritableMemory newRegion = getMemory(newBuffer).writableRegion(newPosition, maxIntermediateSize); + sketch = writableWrap(newRegion, MEM_REQ_SERVER); + } + putSketch(newBuffer, newPosition, sketch); + + final Int2ObjectMap map = sketches.get(oldBuffer); + map.remove(oldPosition); + if (map.isEmpty()) { + sketches.remove(oldBuffer); + memCache.remove(oldBuffer); + } + } + + public void clear() + { + sketches.clear(); + memCache.clear(); + } + + /** + * Retrieves the sketch at a particular position. + */ + public SketchType getSketchAtPosition(final ByteBuffer buf, final int position) + { + return sketches.get(buf).get(position); + } + + private WritableMemory getMemory(final ByteBuffer buffer) + { + return memCache.computeIfAbsent(buffer, + buf -> WritableMemory.writableWrap(buf, ByteOrder.LITTLE_ENDIAN, MEM_REQ_SERVER)); + } + + private void putSketch(final ByteBuffer buffer, final int position, final SketchType sketch) + { + Int2ObjectMap map = sketches.computeIfAbsent(buffer, buf -> new Int2ObjectOpenHashMap<>()); + map.put(position, sketch); + } + + abstract SketchType newDirectInstance(int k, WritableMemory mem, MemoryRequestServer reqServer); + + abstract SketchType writableWrap(WritableMemory mem, MemoryRequestServer reqServer); +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregator.java new file mode 100644 index 000000000000..1ba0d0a13652 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregator.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nullable; + +abstract class KllSketchMergeAggregator implements Aggregator +{ + protected final ColumnValueSelector selector; + @Nullable protected SketchType union; + + public KllSketchMergeAggregator(final ColumnValueSelector selector, final SketchType sketch) + { + this.selector = selector; + union = sketch; + } + + @Override + public synchronized Object get() + { + return union; + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public synchronized void close() + { + union = null; + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregatorFactory.java new file mode 100644 index 000000000000..86b9b507e0ae --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeAggregatorFactory.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.NilColumnValueSelector; + +import javax.annotation.Nullable; + +abstract class KllSketchMergeAggregatorFactory + extends KllSketchAggregatorFactory +{ + public KllSketchMergeAggregatorFactory( + final String name, + final String fieldName, + @Nullable final Integer k, + @Nullable final Long maxStreamLength, + final byte cacheTypeId + ) + { + super( + name, + fieldName, + k, + maxStreamLength, + cacheTypeId + ); + } + + @Override + public Aggregator factorize(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpAggregator(getEmptySketch()); + } + return getMergeAggregator(selector); + } + + @Override + public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(getFieldName()); + if (selector instanceof NilColumnValueSelector) { + return new KllSketchNoOpBufferAggregator(getEmptySketch()); + } + return getMergeBufferAggregator(selector); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java new file mode 100644 index 000000000000..cc04b7fa10ca --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllSketch; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +abstract class KllSketchMergeBufferAggregator implements BufferAggregator +{ + protected final ColumnValueSelector selector; + protected final KllSketchMergeBufferAggregatorHelper helper; + + public KllSketchMergeBufferAggregator( + final ColumnValueSelector selector, + final KllSketchMergeBufferAggregatorHelper helper + ) + { + this.selector = selector; + this.helper = helper; + } + + @Override + public void init(final ByteBuffer buffer, final int position) + { + helper.init(buffer, position); + } + + @Override + public Object get(final ByteBuffer buffer, final int position) + { + return helper.getSketchAtPosition(buffer, position); + } + + @Override + public float getFloat(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public synchronized void close() + { + helper.clear(); + } + + // A small number of sketches may run out of the given memory, request more memory on heap and move there. + // In that case we need to reuse the object from the cache as opposed to wrapping the new buffer. + @Override + public synchronized void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + helper.relocate(oldPosition, newPosition, oldBuffer, newBuffer); + } + + @Override + public void inspectRuntimeShape(final RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java new file mode 100644 index 000000000000..fd54caad48d9 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import org.apache.datasketches.kll.KllSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.IdentityHashMap; + +abstract class KllSketchMergeBufferAggregatorHelper +{ + private static final MemoryRequestServer MEM_REQ_SERVER = new DefaultMemoryRequestServer(); + private final int k; + private final int maxIntermediateSize; + private final IdentityHashMap memCache = new IdentityHashMap<>(); + private final IdentityHashMap> unions = new IdentityHashMap<>(); + + KllSketchMergeBufferAggregatorHelper( + final int k, + final int maxIntermediateSize + ) + { + this.k = k; + this.maxIntermediateSize = maxIntermediateSize; + } + + public void init(final ByteBuffer buffer, final int position) + { + final WritableMemory mem = getMemory(buffer); + final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); + final SketchType union = newDirectInstance(k, region, MEM_REQ_SERVER); + putUnion(buffer, position, union); + } + + public Object get(final ByteBuffer buffer, final int position) + { + return unions.get(buffer).get(position); + } + + public void clear() + { + unions.clear(); + memCache.clear(); + } + + // A small number of sketches may run out of the given memory, request more memory on heap and move there. + // In that case we need to reuse the object from the cache as opposed to wrapping the new buffer. + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + SketchType union = unions.get(oldBuffer).get(oldPosition); + final WritableMemory oldMem = getMemory(oldBuffer).writableRegion(oldPosition, maxIntermediateSize); + if (union.isSameResource(oldMem)) { // union was not relocated on heap + final WritableMemory newMem = getMemory(newBuffer).writableRegion(newPosition, maxIntermediateSize); + union = writableWrap(newMem, MEM_REQ_SERVER); + } + putUnion(newBuffer, newPosition, union); + + Int2ObjectMap map = unions.get(oldBuffer); + map.remove(oldPosition); + if (map.isEmpty()) { + unions.remove(oldBuffer); + memCache.remove(oldBuffer); + } + } + + /** + * Retrieves the sketch at a particular position. + */ + public SketchType getSketchAtPosition(final ByteBuffer buf, final int position) + { + return unions.get(buf).get(position); + } + + private WritableMemory getMemory(final ByteBuffer buffer) + { + return memCache.computeIfAbsent(buffer, + buf -> WritableMemory.writableWrap(buf, ByteOrder.LITTLE_ENDIAN, MEM_REQ_SERVER)); + } + + private void putUnion(final ByteBuffer buffer, final int position, final SketchType union) + { + Int2ObjectMap map = unions.computeIfAbsent(buffer, buf -> new Int2ObjectOpenHashMap<>()); + map.put(position, union); + } + + abstract SketchType newDirectInstance(int k, WritableMemory mem, MemoryRequestServer reqServer); + + abstract SketchType writableWrap(WritableMemory mem, MemoryRequestServer reqServer); +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchModule.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchModule.java new file mode 100644 index 000000000000..0876b3865c07 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchModule.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.annotations.VisibleForTesting; +import com.google.inject.Binder; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.serde.ComplexMetrics; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class KllSketchModule implements DruidModule +{ + public static final String DOUBLES_SKETCH = "KllDoublesSketch"; + public static final String DOUBLES_SKETCH_MERGE = "KllDoublesSketchMerge"; + public static final ColumnType DOUBLES_TYPE = ColumnType.ofComplex(DOUBLES_SKETCH); + public static final ColumnType DOUBLES_MERGE_TYPE = ColumnType.ofComplex(DOUBLES_SKETCH_MERGE); + + public static final String DOUBLES_SKETCH_HISTOGRAM_POST_AGG = "KllDoublesSketchToHistogram"; + public static final String DOUBLES_SKETCH_QUANTILE_POST_AGG = "KllDoublesSketchToQuantile"; + public static final String DOUBLES_SKETCH_QUANTILES_POST_AGG = "KllDoublesSketchToQuantiles"; + public static final String DOUBLES_SKETCH_RANK_POST_AGG = "KllDoublesSketchToRank"; + public static final String DOUBLES_SKETCH_CDF_POST_AGG = "KllDoublesSketchToCDF"; + public static final String DOUBLES_SKETCH_TO_STRING_POST_AGG = "KllDoublesSketchToString"; + + public static final String FLOATS_SKETCH = "KllFloatsSketch"; + public static final String FLOATS_SKETCH_MERGE = "KllFloatsSketchMerge"; + public static final ColumnType FLOATS_TYPE = ColumnType.ofComplex(FLOATS_SKETCH); + public static final ColumnType FLOATS_MERGE_TYPE = ColumnType.ofComplex(FLOATS_SKETCH_MERGE); + + public static final String FLOATS_SKETCH_HISTOGRAM_POST_AGG = "KllFloatsSketchToHistogram"; + public static final String FLOATS_SKETCH_QUANTILE_POST_AGG = "KllFloatsSketchToQuantile"; + public static final String FLOATS_SKETCH_QUANTILES_POST_AGG = "KllFloatsSketchToQuantiles"; + public static final String FLOATS_SKETCH_RANK_POST_AGG = "KllFloatsSketchToRank"; + public static final String FLOATS_SKETCH_CDF_POST_AGG = "KllFloatsSketchToCDF"; + public static final String FLOATS_SKETCH_TO_STRING_POST_AGG = "KllFloatsSketchToString"; + + @Override + public void configure(final Binder binder) + { + registerSerde(); + } + + @Override + public List getJacksonModules() + { + return Collections.unmodifiableList(Arrays.asList( + new SimpleModule("KllDoublesSketchModule") + .registerSubtypes( + new NamedType(KllDoublesSketchAggregatorFactory.class, DOUBLES_SKETCH), + new NamedType(KllDoublesSketchMergeAggregatorFactory.class, DOUBLES_SKETCH_MERGE), + new NamedType(KllDoublesSketchToHistogramPostAggregator.class, DOUBLES_SKETCH_HISTOGRAM_POST_AGG), + new NamedType(KllDoublesSketchToQuantilePostAggregator.class, DOUBLES_SKETCH_QUANTILE_POST_AGG), + new NamedType(KllDoublesSketchToQuantilesPostAggregator.class, DOUBLES_SKETCH_QUANTILES_POST_AGG), + new NamedType(KllDoublesSketchToRankPostAggregator.class, DOUBLES_SKETCH_RANK_POST_AGG), + new NamedType(KllDoublesSketchToCDFPostAggregator.class, DOUBLES_SKETCH_CDF_POST_AGG), + new NamedType(KllDoublesSketchToStringPostAggregator.class, DOUBLES_SKETCH_TO_STRING_POST_AGG) + ).addSerializer(KllDoublesSketch.class, new KllDoublesSketchJsonSerializer()), + new SimpleModule("KllFloatsSketchModule") + .registerSubtypes( + new NamedType(KllFloatsSketchAggregatorFactory.class, FLOATS_SKETCH), + new NamedType(KllFloatsSketchMergeAggregatorFactory.class, FLOATS_SKETCH_MERGE), + new NamedType(KllFloatsSketchToHistogramPostAggregator.class, FLOATS_SKETCH_HISTOGRAM_POST_AGG), + new NamedType(KllFloatsSketchToQuantilePostAggregator.class, FLOATS_SKETCH_QUANTILE_POST_AGG), + new NamedType(KllFloatsSketchToQuantilesPostAggregator.class, FLOATS_SKETCH_QUANTILES_POST_AGG), + new NamedType(KllFloatsSketchToRankPostAggregator.class, FLOATS_SKETCH_RANK_POST_AGG), + new NamedType(KllFloatsSketchToCDFPostAggregator.class, FLOATS_SKETCH_CDF_POST_AGG), + new NamedType(KllFloatsSketchToStringPostAggregator.class, FLOATS_SKETCH_TO_STRING_POST_AGG) + ).addSerializer(KllFloatsSketch.class, new KllFloatsSketchJsonSerializer()) + )); + } + + @VisibleForTesting + public static void registerSerde() + { + ComplexMetrics.registerSerde(DOUBLES_SKETCH, new KllDoublesSketchComplexMetricSerde()); + ComplexMetrics.registerSerde(FLOATS_SKETCH, new KllFloatsSketchComplexMetricSerde()); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpAggregator.java new file mode 100644 index 000000000000..9934a4631d84 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpAggregator.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.druid.query.aggregation.Aggregator; + +public class KllSketchNoOpAggregator implements Aggregator +{ + private final SketchType emptySketch; + + KllSketchNoOpAggregator(final SketchType emptySketch) + { + this.emptySketch = emptySketch; + } + + @Override + public Object get() + { + return emptySketch; + } + + @Override + public void aggregate() + { + } + + @Override + public void close() + { + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("Not implemented"); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpBufferAggregator.java new file mode 100644 index 000000000000..9a812e62d255 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchNoOpBufferAggregator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class KllSketchNoOpBufferAggregator implements BufferAggregator, VectorAggregator +{ + + private final SketchType emptySketch; + + KllSketchNoOpBufferAggregator(final SketchType emptySketch) + { + this.emptySketch = emptySketch; + } + + @Override + public void init(final ByteBuffer buf, final int position) + { + // Nothing to do. + } + + @Override + public void aggregate(final ByteBuffer buf, final int position) + { + // Nothing to do. + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + // Nothing to do. + } + + @Override + public void aggregate( + ByteBuffer buf, + int numRows, + int[] positions, + @Nullable int[] rows, + int positionOffset + ) + { + // Nothing to do. + } + + @Override + public Object get(final ByteBuffer buf, final int position) + { + return emptySketch; + } + + @Override + public float getFloat(final ByteBuffer buf, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buf, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + // Nothing to do. + } + + @Override + public void close() + { + // Nothing to do. + } + + @Override + public void inspectRuntimeShape(final RuntimeShapeInspector inspector) + { + // Nothing to do. + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/package-info.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/package-info.java new file mode 100644 index 000000000000..742471e21dac --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +@EverythingIsNonnullByDefault +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.druid.annotations.EverythingIsNonnullByDefault; diff --git a/extensions-core/datasketches/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-core/datasketches/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule index e429bf180cce..d5d6dafb9e75 100644 --- a/extensions-core/datasketches/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule +++ b/extensions-core/datasketches/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -18,3 +18,4 @@ org.apache.druid.query.aggregation.datasketches.theta.oldapi.OldApiSketchModule org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchModule org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule +org.apache.druid.query.aggregation.datasketches.kll.KllSketchModule diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/GenerateTestData.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/GenerateTestData.java new file mode 100644 index 000000000000..64b8e9ce514d --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/GenerateTestData.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.java.util.common.StringUtils; + +import java.io.BufferedWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +/** This is used for generating test data for {@link KllDoublesSketchAggregatorTest} */ +public class GenerateTestData +{ + + public static void main(String[] args) throws Exception + { + final Path doublesBuildPath = FileSystems.getDefault().getPath("kll_doubles_sketch_build_data.tsv"); + final Path doublesSketchPath = FileSystems.getDefault().getPath("kll_doubles_sketch_data.tsv"); + final Path floatsBuildPath = FileSystems.getDefault().getPath("kll_floats_sketch_build_data.tsv"); + final Path floatsSketchPath = FileSystems.getDefault().getPath("kll_floats_sketch_data.tsv"); + final BufferedWriter doublesBuildData = Files.newBufferedWriter(doublesBuildPath, StandardCharsets.UTF_8); + final BufferedWriter doublesSketchData = Files.newBufferedWriter(doublesSketchPath, StandardCharsets.UTF_8); + final BufferedWriter floatsBuildData = Files.newBufferedWriter(floatsBuildPath, StandardCharsets.UTF_8); + final BufferedWriter floatsSketchData = Files.newBufferedWriter(floatsSketchPath, StandardCharsets.UTF_8); + Random rand = ThreadLocalRandom.current(); + int sequenceNumber = 0; + for (int i = 0; i < 20; i++) { + int product = rand.nextInt(10); + KllDoublesSketch doublesSketch = KllDoublesSketch.newHeapInstance(); + KllFloatsSketch floatsSketch = KllFloatsSketch.newHeapInstance(); + for (int j = 0; j < 20; j++) { + double value = rand.nextDouble(); + doublesBuildData.write("2016010101"); + doublesBuildData.write('\t'); + doublesBuildData.write(Integer.toString(sequenceNumber)); // dimension with unique numbers for ingesting raw data + doublesBuildData.write('\t'); + doublesBuildData.write(Integer.toString(product)); // product dimension + doublesBuildData.write('\t'); + doublesBuildData.write(Double.toString(value)); + floatsBuildData.write("2016010101"); + floatsBuildData.write('\t'); + floatsBuildData.write(Integer.toString(sequenceNumber)); // dimension with unique numbers for ingesting raw data + floatsBuildData.write('\t'); + floatsBuildData.write(Integer.toString(product)); // product dimension + floatsBuildData.write('\t'); + floatsBuildData.write(Float.toString((float) value)); + if (rand.nextFloat() > 0.1) { // 10% nulls in this field + doublesBuildData.write('\t'); + doublesBuildData.write(Double.toString(value * 5 + 5)); + floatsBuildData.write('\t'); + floatsBuildData.write(Float.toString((float) (value * 5 + 5))); + } + doublesBuildData.newLine(); + floatsBuildData.newLine(); + doublesSketch.update(value); + floatsSketch.update((float) value); + sequenceNumber++; + } + doublesSketchData.write("2016010101"); + doublesSketchData.write('\t'); + doublesSketchData.write(Integer.toString(product)); // product dimension + doublesSketchData.write('\t'); + doublesSketchData.write(StringUtils.encodeBase64String(doublesSketch.toByteArray())); + doublesSketchData.newLine(); + floatsSketchData.write("2016010101"); + floatsSketchData.write('\t'); + floatsSketchData.write(Integer.toString(product)); // product dimension + floatsSketchData.write('\t'); + floatsSketchData.write(StringUtils.encodeBase64String(floatsSketch.toByteArray())); + floatsSketchData.newLine(); + } + doublesBuildData.close(); + doublesSketchData.close(); + floatsBuildData.close(); + floatsSketchData.close(); + } + +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java new file mode 100644 index 000000000000..2cfe561ab59c --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class KllDoublesSketchAggregatorFactoryTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(KllDoublesSketchAggregatorFactory.class) + .withNonnullFields("name", "fieldName") + .withIgnoredFields("cacheTypeId") + .usingGetClass() + .verify(); + } + + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes(new NamedType(KllDoublesSketchAggregatorFactory.class, KllSketchModule.DOUBLES_SKETCH)); + final KllDoublesSketchAggregatorFactory factory = new KllDoublesSketchAggregatorFactory( + "myFactory", + "myField", + 1024, + 1000L + ); + final byte[] json = mapper.writeValueAsBytes(factory); + final KllDoublesSketchAggregatorFactory fromJson = (KllDoublesSketchAggregatorFactory) mapper.readValue( + json, + AggregatorFactory.class + ); + Assert.assertEquals(factory, fromJson); + } + + @Test + public void testDefaultParams() + { + final KllDoublesSketchAggregatorFactory factory = new KllDoublesSketchAggregatorFactory( + "myFactory", + "myField", + null, + null + ); + + Assert.assertEquals(KllDoublesSketchAggregatorFactory.DEFAULT_K, factory.getK()); + Assert.assertEquals(KllDoublesSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); + } + + @Test + public void testGuessAggregatorHeapFootprint() + { + KllDoublesSketchAggregatorFactory factory = new KllDoublesSketchAggregatorFactory( + "myFactory", + "myField", + 200, + null + ); + Assert.assertEquals(1644, factory.guessAggregatorHeapFootprint(1)); + Assert.assertEquals(1644, factory.guessAggregatorHeapFootprint(100)); + Assert.assertEquals(3428, factory.guessAggregatorHeapFootprint(1000)); + Assert.assertEquals(6388, factory.guessAggregatorHeapFootprint(1_000_000_000_000L)); + } + + @Test + public void testMaxIntermediateSize() + { + KllDoublesSketchAggregatorFactory factory = new KllDoublesSketchAggregatorFactory( + "myFactory", + "myField", + 200, + null + ); + Assert.assertEquals(5708, factory.getMaxIntermediateSize()); + + factory = new KllDoublesSketchAggregatorFactory( + "myFactory", + "myField", + 200, + 1_000_000_000_000L + ); + Assert.assertEquals(6388, factory.getMaxIntermediateSize()); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new CountAggregatorFactory("count"), + new KllDoublesSketchAggregatorFactory("doublesSketch", "col", 8, 1000000000L), + new KllDoublesSketchMergeAggregatorFactory("doublesSketchMerge", 8) + ) + .postAggregators( + new FieldAccessPostAggregator("doublesSketch-access", "doublesSketch"), + new FinalizingFieldAccessPostAggregator("doublesSketch-finalize", "doublesSketch"), + new FieldAccessPostAggregator("doublesSketchMerge-access", "doublesSketchMerge"), + new FinalizingFieldAccessPostAggregator("doublesSketchMerge-finalize", "doublesSketchMerge") + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("count", ColumnType.LONG) + .add("doublesSketch", null) + .add("doublesSketchMerge", null) + .add("doublesSketch-access", KllSketchModule.DOUBLES_TYPE) + .add("doublesSketch-finalize", ColumnType.LONG) + .add("doublesSketchMerge-access", KllSketchModule.DOUBLES_TYPE) + .add("doublesSketchMerge-finalize", ColumnType.LONG) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java new file mode 100644 index 000000000000..cd85d287a160 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java @@ -0,0 +1,632 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class KllDoublesSketchAggregatorTest extends InitializedNullHandlingTest +{ + private final GroupByQueryConfig config; + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + public KllDoublesSketchAggregatorTest(final GroupByQueryConfig config, final String vectorize) + { + this.config = config; + KllSketchModule.registerSerde(); + KllSketchModule module = new KllSketchModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), + config, + tempFolder + ).withQueryContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize)); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ).withQueryContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize)); + } + + @Parameterized.Parameters(name = "groupByConfig = {0}, vectorize = {1}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + for (String vectorize : new String[]{"false", "true", "force"}) { + constructors.add(new Object[]{config, vectorize}); + } + } + return constructors; + } + + @After + public void teardown() throws IOException + { + helper.close(); + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new KllSketchModule().getJacksonModules().forEach(objectMapper::registerModule); + KllDoublesSketchAggregatorFactory factory = + new KllDoublesSketchAggregatorFactory("name", "filedName", 200, 1000000000L); + + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + // this is to test Json properties and equals for the combining factory + @Test + public void serializeDeserializeCombiningFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new KllSketchModule().getJacksonModules().forEach(objectMapper::registerModule); + KllDoublesSketchAggregatorFactory factory = new KllDoublesSketchMergeAggregatorFactory("name", 200); + + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void ingestingSketches() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"sketch\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object nonExistentSketchObject = row.get(1); + Assert.assertTrue(nonExistentSketchObject instanceof Long); + long nonExistentSketchValue = (long) nonExistentSketchObject; + Assert.assertEquals(0, nonExistentSketchValue); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + // post agg + Object quantilesObject = row.get(2); + Assert.assertTrue(quantilesObject instanceof double[]); + double[] quantiles = (double[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5, quantiles[1], 0.05); // median value + Assert.assertEquals(1, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(3); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + // 400 items uniformly distributed into 4 bins + Assert.assertEquals(100, bin, 100 * 0.2); + } + } + + @Test + public void buildingSketchesAtIngestionTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [ \"sequenceNumber\"],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", + " }", + "}" + ), + "[{\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}," + + "{\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 200},", + " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + Object sketchObjectWithNulls = row.get(1); + Assert.assertTrue(sketchObjectWithNulls instanceof Long); + long sketchValueWithNulls = (long) sketchObjectWithNulls; + Assert.assertEquals(355, sketchValueWithNulls); + + // post agg + Object quantilesObject = row.get(3); + Assert.assertTrue(quantilesObject instanceof double[]); + double[] quantiles = (double[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5, quantiles[1], 0.05); // median value + Assert.assertEquals(1, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(4); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + Assert.assertEquals(4, histogram.length); + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly distributed into 4 bins + } + + // post agg with nulls + Object quantilesObjectWithNulls = row.get(5); + Assert.assertTrue(quantilesObjectWithNulls instanceof double[]); + double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls; + Assert.assertEquals(5.0, quantilesWithNulls[0], 0.05); // min value + Assert.assertEquals(7.5, quantilesWithNulls[1], 0.07); // median value + Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05); // max value + + // post agg with nulls + Object histogramObjectWithNulls = row.get(6); + Assert.assertTrue(histogramObjectWithNulls instanceof double[]); + double[] histogramWithNulls = (double[]) histogramObjectWithNulls; + Assert.assertEquals(4, histogramWithNulls.length); + for (final double bin : histogramWithNulls) { + Assert.assertEquals(100, bin, 50); // distribution is skewed due to nulls + } + } + + @Test + public void buildingSketchesAtQueryTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," + + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200},", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + Object sketchObjectWithNulls = row.get(1); + Assert.assertTrue(sketchObjectWithNulls instanceof Long); + long sketchValueWithNulls = (long) sketchObjectWithNulls; + Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls); + + // post agg + Object quantileObject = row.get(2); + Assert.assertTrue(quantileObject instanceof Double); + Assert.assertEquals(0.5, (double) quantileObject, 0.05); // median value + + // post agg + Object quantilesObject = row.get(3); + Assert.assertTrue(quantilesObject instanceof double[]); + double[] quantiles = (double[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5, quantiles[1], 0.05); // median value + Assert.assertEquals(1, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(4); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly + // distributed into 4 bins + } + + // post agg with nulls + Object quantileObjectWithNulls = row.get(5); + Assert.assertTrue(quantileObjectWithNulls instanceof Double); + Assert.assertEquals( + NullHandling.replaceWithDefault() ? 7.2 : 7.5, + (double) quantileObjectWithNulls, + 0.1 + ); // median value + + // post agg with nulls + Object quantilesObjectWithNulls = row.get(6); + Assert.assertTrue(quantilesObjectWithNulls instanceof double[]); + double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls; + Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 5.0, quantilesWithNulls[0], 0.05); // min value + Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.2 : 7.5, quantilesWithNulls[1], 0.1); // median value + Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05); // max value + + // post agg with nulls + Object histogramObjectWithNulls = row.get(7); + Assert.assertTrue(histogramObjectWithNulls instanceof double[]); + double[] histogramWithNulls = (double[]) histogramObjectWithNulls; + for (final double bin : histogramWithNulls) { + Assert.assertEquals(100, bin, 80); // distribution is skewed due to nulls/0s + // distributed into 4 bins + } + } + + @Test + public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + // post agg + Object quantileObject = row.get(1); + Assert.assertTrue(quantileObject instanceof Double); + Assert.assertEquals(0.5, (double) quantileObject, 0.05); // median value + + // post agg + Object quantilesObject = row.get(2); + Assert.assertTrue(quantilesObject instanceof double[]); + double[] quantiles = (double[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5, quantiles[1], 0.05); // median value + Assert.assertEquals(1, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(3); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly + // distributed into 4 bins + } + } + + @Test + public void timeSeriesQueryInputAsFloat() throws Exception + { + Sequence seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile1\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles1\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + } + + @Test + public void testSuccessWhenMaxStreamLengthHit() throws Exception + { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + } else { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + seq.toList(); + } + } + +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerdeTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerdeTest.java new file mode 100644 index 000000000000..3628c5e6212f --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchComplexMetricSerdeTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.junit.Assert; +import org.junit.Test; + +public class KllDoublesSketchComplexMetricSerdeTest +{ + @Test + public void testExtractorOnEmptyString() + { + final KllDoublesSketchComplexMetricSerde serde = new KllDoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllDoublesSketch sketch = (KllDoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "")), + "foo" + ); + Assert.assertEquals(0, sketch.getNumRetained()); + } + + @Test + public void testExtractorOnPositiveNumber() + { + final KllDoublesSketchComplexMetricSerde serde = new KllDoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllDoublesSketch sketch = (KllDoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnNegativeNumber() + { + final KllDoublesSketchComplexMetricSerde serde = new KllDoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllDoublesSketch sketch = (KllDoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnDecimalNumber() + { + final KllDoublesSketchComplexMetricSerde serde = new KllDoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllDoublesSketch sketch = (KllDoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "3.1")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnLeadingDecimalNumber() + { + final KllDoublesSketchComplexMetricSerde serde = new KllDoublesSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllDoublesSketch sketch = (KllDoublesSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactoryTest.java new file mode 100644 index 000000000000..50afb9ab18c6 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeAggregatorFactoryTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class KllDoublesSketchMergeAggregatorFactoryTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(KllDoublesSketchMergeAggregatorFactory.class) + .withNonnullFields("name", "fieldName") + .withIgnoredFields("cacheTypeId") + .usingGetClass() + .verify(); + } + + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes( + new NamedType(KllDoublesSketchMergeAggregatorFactory.class, KllSketchModule.DOUBLES_SKETCH_MERGE) + ); + final KllDoublesSketchMergeAggregatorFactory factory = new KllDoublesSketchMergeAggregatorFactory( + "myFactory", + 1024, + 1000L + ); + final byte[] json = mapper.writeValueAsBytes(factory); + final KllDoublesSketchMergeAggregatorFactory fromJson = (KllDoublesSketchMergeAggregatorFactory) mapper.readValue( + json, + AggregatorFactory.class + ); + Assert.assertEquals(factory, fromJson); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregatorTest.java new file mode 100644 index 000000000000..70b189b638b2 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToCDFPostAggregatorTest.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllDoublesSketchToCDFPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75} + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToCDFPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToCDFPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75} + ); + + Assert.assertEquals( + "KllDoublesSketchToCDFPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, splitPoints=[0.25, 0.75]}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing histograms is not supported"); + final PostAggregator postAgg = new KllDoublesSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75} + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToCDFPostAggregator.class) + .withNonnullFields("name", "field", "splitPoints") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToCDFPostAggregator( + "cdf", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {4} + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertTrue(Double.isNaN(histogram[0])); + Assert.assertTrue(Double.isNaN(histogram[1])); + } + + @Test + public void normalCase() + { + final double[] values = new double[] {1, 2, 3, 4, 5, 6}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToCDFPostAggregator( + "cdf", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {4} // half of the distribution is below 4 + ); + + final double[] cdf = (double[]) postAgg.compute(fields); + Assert.assertNotNull(cdf); + Assert.assertEquals(2, cdf.length); + Assert.assertEquals(0.5, cdf[0], 0); + Assert.assertEquals(1.0, cdf[1], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllDoublesSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllDoublesSketchToCDFPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {4} + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregatorTest.java new file mode 100644 index 000000000000..e3f55b4fb793 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToHistogramPostAggregatorTest.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllDoublesSketchToHistogramPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75}, + null + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToHistogramPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToHistogramPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75}, + null + ); + + Assert.assertEquals( + "KllDoublesSketchToHistogramPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, splitPoints=[0.25, 0.75], numBins=null}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing histograms is not supported"); + final PostAggregator postAgg = new KllDoublesSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[]{0.25, 0.75}, + null + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToHistogramPostAggregator.class) + .withNonnullFields("name", "field", "splitPoints") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {3.5}, + null + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertTrue(Double.isNaN(histogram[0])); + Assert.assertTrue(Double.isNaN(histogram[1])); + } + + @Test + public void splitPoints() + { + final double[] values = new double[] {1, 2, 3, 4, 5, 6}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {3.5}, // splits distribution into two bins of equal mass + null + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertEquals(3.0, histogram[0], 0); + Assert.assertEquals(3.0, histogram[1], 0); + } + + @Test + public void numBins() + { + final double[] values = new double[] {1, 2, 3, 4, 5, 6}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + null, + 2 // two bins of equal mass + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertEquals(3.0, histogram[0], 0); + Assert.assertEquals(3.0, histogram[1], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllDoublesSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllDoublesSketchToHistogramPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {3.5}, + null + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregatorTest.java new file mode 100644 index 000000000000..d130a1aabe5c --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilePostAggregatorTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +public class KllDoublesSketchToQuantilePostAggregatorTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToQuantilePostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0.5 + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToQuantilePostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToQuantilePostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToQuantilePostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0.5 + ); + + Assert.assertEquals( + "KllDoublesSketchToQuantilePostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, fraction=0.5}", + postAgg.toString() + ); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToQuantilePostAggregator.class) + .withNonnullFields("name", "field", "fraction") + .usingGetClass() + .verify(); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregatorTest.java new file mode 100644 index 000000000000..ff8d1985f0ba --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToQuantilesPostAggregatorTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllDoublesSketchToQuantilesPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToQuantilesPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToQuantilesPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + + Assert.assertEquals( + "KllDoublesSketchToQuantilesPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, fractions=[0.0, 0.5, 1.0]}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing arrays of quantiles is not supported"); + final PostAggregator postAgg = new KllDoublesSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToQuantilesPostAggregator.class) + .withNonnullFields("name", "field", "fractions") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final double[] quantiles = (double[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertTrue(Double.isNaN(quantiles[0])); + Assert.assertTrue(Double.isNaN(quantiles[1])); + Assert.assertTrue(Double.isNaN(quantiles[2])); + } + + @Test + public void normalCase() + { + final double[] values = new double[] {1, 2, 3, 4, 5}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final double[] quantiles = (double[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertEquals(1.0, quantiles[0], 0); + Assert.assertEquals(3.0, quantiles[1], 0); + Assert.assertEquals(5.0, quantiles[2], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllDoublesSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllDoublesSketchToQuantilesPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregatorTest.java new file mode 100644 index 000000000000..4dcb4149b82a --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToRankPostAggregatorTest.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class KllDoublesSketchToRankPostAggregatorTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToRankPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0 + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToRankPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToRankPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToRankPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0 + ); + + Assert.assertEquals( + "KllDoublesSketchToRankPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, value=0.0}", + postAgg.toString() + ); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToRankPostAggregator.class) + .withNonnullFields("name", "field", "value") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null); + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToRankPostAggregator( + "rank", + new FieldAccessPostAggregator("field", "sketch"), + 0 + ); + + final double rank = (double) postAgg.compute(fields); + Assert.assertTrue(Double.isNaN(rank)); + } + + @Test + public void normalCase() + { + final double[] values = new double[] {1, 2, 3, 4, 5, 6}; + final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values); + + final Aggregator agg = new KllDoublesSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllDoublesSketchToRankPostAggregator( + "rank", + new FieldAccessPostAggregator("field", "sketch"), + 4 + ); + + final double rank = (double) postAgg.compute(fields); + Assert.assertEquals(0.5, rank, 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllDoublesSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllDoublesSketchToRankPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + 4 + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregatorTest.java new file mode 100644 index 000000000000..7c4a908248fe --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchToStringPostAggregatorTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class KllDoublesSketchToStringPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllDoublesSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllDoublesSketchToStringPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllDoublesSketchToStringPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllDoublesSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + + Assert.assertEquals( + "KllDoublesSketchToStringPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing sketch summaries is not supported"); + final PostAggregator postAgg = new KllDoublesSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllDoublesSketchToStringPostAggregator.class) + .withNonnullFields("name", "field") + .usingGetClass() + .verify(); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java new file mode 100644 index 000000000000..25eae831db4c --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class KllFloatsSketchAggregatorFactoryTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(KllFloatsSketchAggregatorFactory.class) + .withNonnullFields("name", "fieldName") + .withIgnoredFields("cacheTypeId") + .usingGetClass() + .verify(); + } + + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes(new NamedType(KllFloatsSketchAggregatorFactory.class, KllSketchModule.FLOATS_SKETCH)); + final KllFloatsSketchAggregatorFactory factory = new KllFloatsSketchAggregatorFactory( + "myFactory", + "myField", + 1024, + 1000L + ); + final byte[] json = mapper.writeValueAsBytes(factory); + final KllFloatsSketchAggregatorFactory fromJson = (KllFloatsSketchAggregatorFactory) mapper.readValue( + json, + AggregatorFactory.class + ); + Assert.assertEquals(factory, fromJson); + } + + @Test + public void testDefaultParams() + { + final KllFloatsSketchAggregatorFactory factory = new KllFloatsSketchAggregatorFactory( + "myFactory", + "myField", + null, + null + ); + + Assert.assertEquals(KllFloatsSketchAggregatorFactory.DEFAULT_K, factory.getK()); + Assert.assertEquals(KllFloatsSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); + } + + @Test + public void testGuessAggregatorHeapFootprint() + { + KllFloatsSketchAggregatorFactory factory = new KllFloatsSketchAggregatorFactory( + "myFactory", + "myField", + 200, + null + ); + Assert.assertEquals(836, factory.guessAggregatorHeapFootprint(1)); + Assert.assertEquals(836, factory.guessAggregatorHeapFootprint(100)); + Assert.assertEquals(1732, factory.guessAggregatorHeapFootprint(1000)); + Assert.assertEquals(3272, factory.guessAggregatorHeapFootprint(1_000_000_000_000L)); + } + + @Test + public void testMaxIntermediateSize() + { + KllFloatsSketchAggregatorFactory factory = new KllFloatsSketchAggregatorFactory( + "myFactory", + "myField", + 200, + null + ); + Assert.assertEquals(2912, factory.getMaxIntermediateSize()); + + factory = new KllFloatsSketchAggregatorFactory( + "myFactory", + "myField", + 200, + 1_000_000_000_000L + ); + Assert.assertEquals(3272, factory.getMaxIntermediateSize()); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new CountAggregatorFactory("count"), + new KllFloatsSketchAggregatorFactory("floatsSketch", "col", 8, 1000000000L), + new KllFloatsSketchMergeAggregatorFactory("floatsSketchMerge", 8) + ) + .postAggregators( + new FieldAccessPostAggregator("floatsSketch-access", "floatsSketch"), + new FinalizingFieldAccessPostAggregator("floatsSketch-finalize", "floatsSketch"), + new FieldAccessPostAggregator("floatsSketchMerge-access", "floatsSketchMerge"), + new FinalizingFieldAccessPostAggregator("floatsSketchMerge-finalize", "floatsSketchMerge") + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("count", ColumnType.LONG) + .add("floatsSketch", null) + .add("floatsSketchMerge", null) + .add("floatsSketch-access", KllSketchModule.FLOATS_TYPE) + .add("floatsSketch-finalize", ColumnType.LONG) + .add("floatsSketchMerge-access", KllSketchModule.FLOATS_TYPE) + .add("floatsSketchMerge-finalize", ColumnType.LONG) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java new file mode 100644 index 000000000000..1e71d4edfa0d --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java @@ -0,0 +1,632 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class KllFloatsSketchAggregatorTest extends InitializedNullHandlingTest +{ + private final GroupByQueryConfig config; + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + public KllFloatsSketchAggregatorTest(final GroupByQueryConfig config, final String vectorize) + { + this.config = config; + KllSketchModule.registerSerde(); + KllSketchModule module = new KllSketchModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), + config, + tempFolder + ).withQueryContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize)); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ).withQueryContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize)); + } + + @Parameterized.Parameters(name = "groupByConfig = {0}, vectorize = {1}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + for (String vectorize : new String[]{"false", "true", "force"}) { + constructors.add(new Object[]{config, vectorize}); + } + } + return constructors; + } + + @After + public void teardown() throws IOException + { + helper.close(); + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new KllSketchModule().getJacksonModules().forEach(objectMapper::registerModule); + KllFloatsSketchAggregatorFactory factory = + new KllFloatsSketchAggregatorFactory("name", "filedName", 200, 1000000000L); + + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + // this is to test Json properties and equals for the combining factory + @Test + public void serializeDeserializeCombiningFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new KllSketchModule().getJacksonModules().forEach(objectMapper::registerModule); + KllFloatsSketchAggregatorFactory factory = new KllFloatsSketchMergeAggregatorFactory("name", 200); + + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void ingestingSketches() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"sketch\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object nonExistentSketchObject = row.get(1); + Assert.assertTrue(nonExistentSketchObject instanceof Long); + long nonExistentSketchValue = (long) nonExistentSketchObject; + Assert.assertEquals(0, nonExistentSketchValue); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + // post agg + Object quantilesObject = row.get(2); + Assert.assertTrue(quantilesObject instanceof float[]); + float[] quantiles = (float[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5f, quantiles[1], 0.05); // median value + Assert.assertEquals(1f, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(3); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + // 400 items uniformly distributed into 4 bins + Assert.assertEquals(100, bin, 100 * 0.2); + } + } + + @Test + public void buildingSketchesAtIngestionTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [ \"sequenceNumber\"],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", + " }", + "}" + ), + "[{\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}," + + "{\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 200},", + " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + Object sketchObjectWithNulls = row.get(1); + Assert.assertTrue(sketchObjectWithNulls instanceof Long); + long sketchValueWithNulls = (long) sketchObjectWithNulls; + Assert.assertEquals(355, sketchValueWithNulls); + + // post agg + Object quantilesObject = row.get(3); + Assert.assertTrue(quantilesObject instanceof float[]); + float[] quantiles = (float[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5f, quantiles[1], 0.05); // median value + Assert.assertEquals(1f, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(4); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + Assert.assertEquals(4, histogram.length); + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly distributed into 4 bins + } + + // post agg with nulls + Object quantilesObjectWithNulls = row.get(5); + Assert.assertTrue(quantilesObjectWithNulls instanceof float[]); + float[] quantilesWithNulls = (float[]) quantilesObjectWithNulls; + Assert.assertEquals(5f, quantilesWithNulls[0], 0.05); // min value + Assert.assertEquals(7.5f, quantilesWithNulls[1], 0.07); // median value + Assert.assertEquals(10f, quantilesWithNulls[2], 0.05); // max value + + // post agg with nulls + Object histogramObjectWithNulls = row.get(6); + Assert.assertTrue(histogramObjectWithNulls instanceof double[]); + double[] histogramWithNulls = (double[]) histogramObjectWithNulls; + Assert.assertEquals(4, histogramWithNulls.length); + for (final double bin : histogramWithNulls) { + Assert.assertEquals(100, bin, 50); // distribution is skewed due to nulls + } + } + + @Test + public void buildingSketchesAtQueryTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," + + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200},", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + Object sketchObjectWithNulls = row.get(1); + Assert.assertTrue(sketchObjectWithNulls instanceof Long); + long sketchValueWithNulls = (long) sketchObjectWithNulls; + Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls); + + // post agg + Object quantileObject = row.get(2); + Assert.assertTrue(quantileObject instanceof Float); + Assert.assertEquals(0.5f, (float) quantileObject, 0.05); // median value + + // post agg + Object quantilesObject = row.get(3); + Assert.assertTrue(quantilesObject instanceof float[]); + float[] quantiles = (float[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5f, quantiles[1], 0.05); // median value + Assert.assertEquals(1f, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(4); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly + // distributed into 4 bins + } + + // post agg with nulls + Object quantileObjectWithNulls = row.get(5); + Assert.assertTrue(quantileObjectWithNulls instanceof Float); + Assert.assertEquals( + NullHandling.replaceWithDefault() ? 7.2f : 7.5f, + (float) quantileObjectWithNulls, + 0.1 + ); // median value + + // post agg with nulls + Object quantilesObjectWithNulls = row.get(6); + Assert.assertTrue(quantilesObjectWithNulls instanceof float[]); + float[] quantilesWithNulls = (float[]) quantilesObjectWithNulls; + Assert.assertEquals(NullHandling.replaceWithDefault() ? 0 : 5f, quantilesWithNulls[0], 0.05); // min value + Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.2f : 7.5f, quantilesWithNulls[1], 0.1); // median value + Assert.assertEquals(10f, quantilesWithNulls[2], 0.05); // max value + + // post agg with nulls + Object histogramObjectWithNulls = row.get(7); + Assert.assertTrue(histogramObjectWithNulls instanceof double[]); + double[] histogramWithNulls = (double[]) histogramObjectWithNulls; + for (final double bin : histogramWithNulls) { + Assert.assertEquals(100, bin, 80); // distribution is skewed due to nulls/0s + // distributed into 4 bins + } + } + + @Test + public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + + Object sketchObject = row.get(0); + Assert.assertTrue(sketchObject instanceof Long); + long sketchValue = (long) sketchObject; + Assert.assertEquals(400, sketchValue); + + // post agg + Object quantileObject = row.get(1); + Assert.assertTrue(quantileObject instanceof Float); + Assert.assertEquals(0.5f, (float) quantileObject, 0.05); // median value + + // post agg + Object quantilesObject = row.get(2); + Assert.assertTrue(quantilesObject instanceof float[]); + float[] quantiles = (float[]) quantilesObject; + Assert.assertEquals(0, quantiles[0], 0.05); // min value + Assert.assertEquals(0.5f, quantiles[1], 0.05); // median value + Assert.assertEquals(1f, quantiles[2], 0.05); // max value + + // post agg + Object histogramObject = row.get(3); + Assert.assertTrue(histogramObject instanceof double[]); + double[] histogram = (double[]) histogramObject; + for (final double bin : histogram) { + Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly + // distributed into 4 bins + } + } + + @Test + public void timeSeriesQueryInputAsFloat() throws Exception + { + Sequence seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile1\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles1\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + } + + @Test + public void testSuccessWhenMaxStreamLengthHit() throws Exception + { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + } else { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"sequenceNumber\", \"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + seq.toList(); + } + } + +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerdeTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerdeTest.java new file mode 100644 index 000000000000..5ff441df1c14 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchComplexMetricSerdeTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.junit.Assert; +import org.junit.Test; + +public class KllFloatsSketchComplexMetricSerdeTest +{ + @Test + public void testExtractorOnEmptyString() + { + final KllFloatsSketchComplexMetricSerde serde = new KllFloatsSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllFloatsSketch sketch = (KllFloatsSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "")), + "foo" + ); + Assert.assertEquals(0, sketch.getNumRetained()); + } + + @Test + public void testExtractorOnPositiveNumber() + { + final KllFloatsSketchComplexMetricSerde serde = new KllFloatsSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllFloatsSketch sketch = (KllFloatsSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnNegativeNumber() + { + final KllFloatsSketchComplexMetricSerde serde = new KllFloatsSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllFloatsSketch sketch = (KllFloatsSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnDecimalNumber() + { + final KllFloatsSketchComplexMetricSerde serde = new KllFloatsSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllFloatsSketch sketch = (KllFloatsSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "3.1")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d); + } + + @Test + public void testExtractorOnLeadingDecimalNumber() + { + final KllFloatsSketchComplexMetricSerde serde = new KllFloatsSketchComplexMetricSerde(); + final ComplexMetricExtractor extractor = serde.getExtractor(); + final KllFloatsSketch sketch = (KllFloatsSketch) extractor.extractValue( + new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")), + "foo" + ); + Assert.assertEquals(1, sketch.getNumRetained()); + Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactoryTest.java new file mode 100644 index 000000000000..43e0da938737 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeAggregatorFactoryTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class KllFloatsSketchMergeAggregatorFactoryTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(KllFloatsSketchMergeAggregatorFactory.class) + .withNonnullFields("name", "fieldName") + .withIgnoredFields("cacheTypeId") + .usingGetClass() + .verify(); + } + + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes( + new NamedType(KllFloatsSketchMergeAggregatorFactory.class, KllSketchModule.FLOATS_SKETCH_MERGE) + ); + final KllFloatsSketchMergeAggregatorFactory factory = new KllFloatsSketchMergeAggregatorFactory( + "myFactory", + 1024, + 1000L + ); + final byte[] json = mapper.writeValueAsBytes(factory); + final KllFloatsSketchMergeAggregatorFactory fromJson = (KllFloatsSketchMergeAggregatorFactory) mapper.readValue( + json, + AggregatorFactory.class + ); + Assert.assertEquals(factory, fromJson); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregatorTest.java new file mode 100644 index 000000000000..d953b0aebacc --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToCDFPostAggregatorTest.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestFloatColumnSelector; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllFloatsSketchToCDFPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f} + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToCDFPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToCDFPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f} + ); + + Assert.assertEquals( + "KllFloatsSketchToCDFPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, splitPoints=[0.25, 0.75]}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing histograms is not supported"); + final PostAggregator postAgg = new KllFloatsSketchToCDFPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f} + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToCDFPostAggregator.class) + .withNonnullFields("name", "field", "splitPoints") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestFloatColumnSelector selector = new TestFloatColumnSelector(null); + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToCDFPostAggregator( + "cdf", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {4} + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertTrue(Double.isNaN(histogram[0])); + Assert.assertTrue(Double.isNaN(histogram[1])); + } + + @Test + public void normalCase() + { + final float[] values = new float[] {1, 2, 3, 4, 5, 6}; + final TestFloatColumnSelector selector = new TestFloatColumnSelector(values); + + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToCDFPostAggregator( + "cdf", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {4} // half of the distribution is below 4 + ); + + final double[] cdf = (double[]) postAgg.compute(fields); + Assert.assertNotNull(cdf); + Assert.assertEquals(2, cdf.length); + Assert.assertEquals(0.5, cdf[0], 0); + Assert.assertEquals(1.0, cdf[1], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllFloatsSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllFloatsSketchToCDFPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {4} + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregatorTest.java new file mode 100644 index 000000000000..5b970816f3fa --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToHistogramPostAggregatorTest.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestFloatColumnSelector; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllFloatsSketchToHistogramPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f}, + null + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToHistogramPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToHistogramPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f}, + null + ); + + Assert.assertEquals( + "KllFloatsSketchToHistogramPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, splitPoints=[0.25, 0.75], numBins=null}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing histograms is not supported"); + final PostAggregator postAgg = new KllFloatsSketchToHistogramPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new float[]{0.25f, 0.75f}, + null + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToHistogramPostAggregator.class) + .withNonnullFields("name", "field", "splitPoints") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestFloatColumnSelector selector = new TestFloatColumnSelector(null); + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {3.5f}, + null + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertTrue(Double.isNaN(histogram[0])); + Assert.assertTrue(Double.isNaN(histogram[1])); + } + + @Test + public void splitPoints() + { + final float[] values = new float[] {1, 2, 3, 4, 5, 6}; + final TestFloatColumnSelector selector = new TestFloatColumnSelector(values); + + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {3.5f}, // splits distribution into two bins of equal mass + null + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertEquals(3.0, histogram[0], 0); + Assert.assertEquals(3.0, histogram[1], 0); + } + + @Test + public void numBins() + { + final float[] values = new float[] {1, 2, 3, 4, 5, 6}; + final TestFloatColumnSelector selector = new TestFloatColumnSelector(values); + + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + null, + 2 // two bins of equal mass + ); + + final double[] histogram = (double[]) postAgg.compute(fields); + Assert.assertNotNull(histogram); + Assert.assertEquals(2, histogram.length); + Assert.assertEquals(3.0, histogram[0], 0); + Assert.assertEquals(3.0, histogram[1], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllFloatsSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllFloatsSketchToHistogramPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new float[] {3.5f}, + null + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregatorTest.java new file mode 100644 index 000000000000..1ca8b89f3b3f --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilePostAggregatorTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +public class KllFloatsSketchToQuantilePostAggregatorTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToQuantilePostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0.5 + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToQuantilePostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToQuantilePostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToQuantilePostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0.5 + ); + + Assert.assertEquals( + "KllFloatsSketchToQuantilePostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, fraction=0.5}", + postAgg.toString() + ); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToQuantilePostAggregator.class) + .withNonnullFields("name", "field", "fraction") + .usingGetClass() + .verify(); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregatorTest.java new file mode 100644 index 000000000000..6f4724bc9709 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToQuantilesPostAggregatorTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestFloatColumnSelector; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.HashMap; +import java.util.Map; + +public class KllFloatsSketchToQuantilesPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToQuantilesPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToQuantilesPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + + Assert.assertEquals( + "KllFloatsSketchToQuantilesPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, fractions=[0.0, 0.5, 1.0]}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing arrays of quantiles is not supported"); + final PostAggregator postAgg = new KllFloatsSketchToQuantilesPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + new double[] {0, 0.5, 1} + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToQuantilesPostAggregator.class) + .withNonnullFields("name", "field", "fractions") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestFloatColumnSelector selector = new TestFloatColumnSelector(null); + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final float[] quantiles = (float[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertTrue(Float.isNaN(quantiles[0])); + Assert.assertTrue(Float.isNaN(quantiles[1])); + Assert.assertTrue(Float.isNaN(quantiles[2])); + } + + @Test + public void normalCase() + { + final float[] values = new float[] {1, 2, 3, 4, 5}; + final TestFloatColumnSelector selector = new TestFloatColumnSelector(values); + + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ); + + final float[] quantiles = (float[]) postAgg.compute(fields); + Assert.assertNotNull(quantiles); + Assert.assertEquals(3, quantiles.length); + Assert.assertEquals(1f, quantiles[0], 0); + Assert.assertEquals(3f, quantiles[1], 0); + Assert.assertEquals(5f, quantiles[2], 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllFloatsSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllFloatsSketchToQuantilesPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + new double[] {0, 0.5, 1} + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.FLOAT_ARRAY) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregatorTest.java new file mode 100644 index 000000000000..83e1368058db --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToRankPostAggregatorTest.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.TestFloatColumnSelector; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class KllFloatsSketchToRankPostAggregatorTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToRankPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0 + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToRankPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToRankPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToRankPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch"), + 0 + ); + + Assert.assertEquals( + "KllFloatsSketchToRankPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}, value=0.0}", + postAgg.toString() + ); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToRankPostAggregator.class) + .withNonnullFields("name", "field", "value") + .usingGetClass() + .verify(); + } + + @Test + public void emptySketch() + { + final TestFloatColumnSelector selector = new TestFloatColumnSelector(null); + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToRankPostAggregator( + "rank", + new FieldAccessPostAggregator("field", "sketch"), + 0 + ); + + final double rank = (double) postAgg.compute(fields); + Assert.assertTrue(Double.isNaN(rank)); + } + + @Test + public void normalCase() + { + final float[] values = new float[] {1, 2, 3, 4, 5, 6}; + final TestFloatColumnSelector selector = new TestFloatColumnSelector(values); + + final Aggregator agg = new KllFloatsSketchBuildAggregator(selector, 8); + //noinspection ForLoopReplaceableByForEach + for (int i = 0; i < values.length; i++) { + agg.aggregate(); + selector.increment(); + } + + final Map fields = new HashMap<>(); + fields.put("sketch", agg.get()); + + final PostAggregator postAgg = new KllFloatsSketchToRankPostAggregator( + "rank", + new FieldAccessPostAggregator("field", "sketch"), + 4 + ); + + final double rank = (double) postAgg.compute(fields); + Assert.assertEquals(0.5, rank, 0); + } + + @Test + public void testResultArraySignature() + { + final TimeseriesQuery query = + Druids.newTimeseriesQueryBuilder() + .dataSource("dummy") + .intervals("2000/3000") + .granularity(Granularities.HOUR) + .aggregators( + new KllFloatsSketchAggregatorFactory("sketch", "col", 8, 1000000L) + ) + .postAggregators( + new KllFloatsSketchToRankPostAggregator( + "a", + new FieldAccessPostAggregator("field", "sketch"), + 4 + ) + ) + .build(); + + Assert.assertEquals( + RowSignature.builder() + .addTimeColumn() + .add("sketch", null) + .add("a", ColumnType.DOUBLE) + .build(), + new TimeseriesQueryQueryToolChest().resultArraySignature(query) + ); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregatorTest.java new file mode 100644 index 000000000000..0d8a8f54e2c0 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchToStringPostAggregatorTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.kll; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class KllFloatsSketchToStringPostAggregatorTest +{ + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Test + public void testSerde() throws JsonProcessingException + { + final PostAggregator there = new KllFloatsSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + DefaultObjectMapper mapper = new DefaultObjectMapper(); + KllFloatsSketchToStringPostAggregator andBackAgain = mapper.readValue( + mapper.writeValueAsString(there), + KllFloatsSketchToStringPostAggregator.class + ); + + Assert.assertEquals(there, andBackAgain); + Assert.assertArrayEquals(there.getCacheKey(), andBackAgain.getCacheKey()); + } + + @Test + public void testToString() + { + final PostAggregator postAgg = new KllFloatsSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + + Assert.assertEquals( + "KllFloatsSketchToStringPostAggregator{name='post', field=FieldAccessPostAggregator{name='field1', fieldName='sketch'}}", + postAgg.toString() + ); + } + + @Test + public void testComparator() + { + expectedException.expect(IAE.class); + expectedException.expectMessage("Comparing sketch summaries is not supported"); + final PostAggregator postAgg = new KllFloatsSketchToStringPostAggregator( + "post", + new FieldAccessPostAggregator("field1", "sketch") + ); + postAgg.getComparator(); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(KllFloatsSketchToStringPostAggregator.class) + .withNonnullFields("name", "field") + .usingGetClass() + .verify(); + } +} diff --git a/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_build_data.tsv b/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_build_data.tsv new file mode 100644 index 000000000000..596aee3d6393 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_build_data.tsv @@ -0,0 +1,400 @@ +2016010101 0 3 0.3823420115618271 6.9117100578091355 +2016010101 1 3 0.6594032531116681 8.29701626555834 +2016010101 2 3 0.15365051435565968 5.768252571778298 +2016010101 3 3 0.82015379205599 9.10076896027995 +2016010101 4 3 0.701456721118654 +2016010101 5 3 0.9161628872993025 9.580814436496514 +2016010101 6 3 0.3283606025742475 6.641803012871238 +2016010101 7 3 0.7636022396035272 8.818011198017636 +2016010101 8 3 0.15417910849547067 5.770895542477353 +2016010101 9 3 0.48945974430297756 7.447298721514888 +2016010101 10 3 0.5205343078043222 +2016010101 11 3 0.7869614735870856 +2016010101 12 3 0.7303195064717449 +2016010101 13 3 0.4907649952308507 7.453824976154253 +2016010101 14 3 0.7623116099717726 8.811558049858863 +2016010101 15 3 0.7733297999476294 8.866648999738146 +2016010101 16 3 0.9312559986645847 9.656279993322924 +2016010101 17 3 0.1080066831119848 5.540033415559924 +2016010101 18 3 0.040295623588079255 5.201478117940396 +2016010101 19 3 0.9779547095635436 9.889773547817718 +2016010101 20 0 0.29773127979336755 6.488656398966838 +2016010101 21 0 0.7500197474246267 8.750098737123134 +2016010101 22 0 0.41395719197813907 +2016010101 23 0 0.7935186557949864 8.967593278974931 +2016010101 24 0 0.3494079314550629 6.747039657275314 +2016010101 25 0 0.49724319869827305 7.486215993491365 +2016010101 26 0 0.4817975356853148 7.408987678426574 +2016010101 27 0 0.04918537146318602 5.24592685731593 +2016010101 28 0 0.45553026287756504 7.277651314387825 +2016010101 29 0 0.9187245246443049 9.593622623221524 +2016010101 30 0 0.07689070944756793 5.38445354723784 +2016010101 31 0 0.38071933371375755 6.903596668568788 +2016010101 32 0 0.31230710034643827 6.561535501732191 +2016010101 33 0 0.22713292113038341 +2016010101 34 0 0.9112390985235956 9.556195492617977 +2016010101 35 0 0.5097933998389343 7.548966999194672 +2016010101 36 0 0.9390066752489092 9.695033376244545 +2016010101 37 0 0.09350082668860316 5.467504133443016 +2016010101 38 0 0.6570811357192442 8.28540567859622 +2016010101 39 0 0.1397539345403308 5.698769672701654 +2016010101 40 3 0.24283580100750457 6.214179005037523 +2016010101 41 3 7.892017562161069E-5 5.000394600878108 +2016010101 42 3 0.09588397485933653 5.479419874296683 +2016010101 43 3 0.3578428407483465 6.789214203741732 +2016010101 44 3 0.2394867810348832 6.197433905174416 +2016010101 45 3 0.3863758407053095 6.931879203526547 +2016010101 46 3 0.9425046172454175 9.712523086227087 +2016010101 47 3 0.37696954766826785 6.884847738341339 +2016010101 48 3 0.7204254950875869 8.602127475437936 +2016010101 49 3 0.15889494378322044 +2016010101 50 3 0.5012862409279809 7.506431204639904 +2016010101 51 3 0.8720390625090659 9.36019531254533 +2016010101 52 3 0.27727898235410353 6.386394911770518 +2016010101 53 3 0.5629182191199141 7.814591095599571 +2016010101 54 3 0.9091198323846115 9.545599161923057 +2016010101 55 3 0.7885883855851757 8.94294192792588 +2016010101 56 3 0.10438179587002272 5.521908979350114 +2016010101 57 3 0.9481950387951917 9.74097519397596 +2016010101 58 3 0.7240800722974705 8.620400361487352 +2016010101 59 3 0.4401982343612464 7.200991171806232 +2016010101 60 9 0.7043087418684459 8.521543709342229 +2016010101 61 9 0.34009333250453555 6.700466662522677 +2016010101 62 9 0.47725816793850306 7.386290839692515 +2016010101 63 9 0.7023789165436026 8.511894582718014 +2016010101 64 9 0.5523685157790932 7.761842578895466 +2016010101 65 9 0.40110151595141996 7.0055075797571 +2016010101 66 9 0.6915717461070983 8.457858730535492 +2016010101 67 9 0.24956873377466193 6.24784366887331 +2016010101 68 9 0.6748210762027543 +2016010101 69 9 0.40388756059887765 7.019437802994388 +2016010101 70 9 0.18024953151999767 5.901247657599988 +2016010101 71 9 0.14258876127742237 5.712943806387112 +2016010101 72 9 0.02232919288537949 5.111645964426898 +2016010101 73 9 0.10005355961470819 5.500267798073541 +2016010101 74 9 0.7081148158318151 8.540574079159075 +2016010101 75 9 0.7284439105147055 8.642219552573527 +2016010101 76 9 0.5871482899449274 7.935741449724637 +2016010101 77 9 0.6608684415254594 8.304342207627297 +2016010101 78 9 0.6577685827545199 8.2888429137726 +2016010101 79 9 0.5364962612665086 +2016010101 80 1 0.6531289856442395 8.265644928221198 +2016010101 81 1 0.3376608709152451 6.688304354576226 +2016010101 82 1 0.7518847347190302 8.759423673595151 +2016010101 83 1 0.6591227359739772 8.295613679869886 +2016010101 84 1 0.09914377601858937 +2016010101 85 1 0.9409542617315636 9.704771308657818 +2016010101 86 1 0.7481457244157709 8.740728622078855 +2016010101 87 1 0.5311784411860021 7.6558922059300105 +2016010101 88 1 0.3828017926662086 6.914008963331043 +2016010101 89 1 0.21666886406638552 6.0833443203319275 +2016010101 90 1 0.3540262174926062 6.770131087463032 +2016010101 91 1 0.25396835033133414 6.269841751656671 +2016010101 92 1 0.4373575777156927 7.186787888578463 +2016010101 93 1 0.3884932791715622 6.942466395857811 +2016010101 94 1 0.06742115045472896 5.3371057522736445 +2016010101 95 1 0.141187392099684 5.70593696049842 +2016010101 96 1 0.2824511377655894 6.412255688827947 +2016010101 97 1 0.14260618928313795 5.71303094641569 +2016010101 98 1 0.8625765874997501 9.31288293749875 +2016010101 99 1 0.721980392232156 8.60990196116078 +2016010101 100 1 0.4798758212028599 7.399379106014299 +2016010101 101 1 0.39008632619217154 6.950431630960858 +2016010101 102 1 0.6991003649770101 8.49550182488505 +2016010101 103 1 0.6742130766008694 8.371065383004346 +2016010101 104 1 0.10878639788922428 5.543931989446121 +2016010101 105 1 0.004459698583526794 5.022298492917634 +2016010101 106 1 0.025013781442667082 5.125068907213335 +2016010101 107 1 0.22402085407750505 6.120104270387525 +2016010101 108 1 0.9006074259000753 9.503037129500377 +2016010101 109 1 0.9628942582398963 9.814471291199482 +2016010101 110 1 0.5637745246698173 7.818872623349087 +2016010101 111 1 0.03298379786159089 5.164918989307955 +2016010101 112 1 0.9032890678209848 9.516445339104925 +2016010101 113 1 0.9872953370527403 +2016010101 114 1 0.6329442974732324 8.164721487366162 +2016010101 115 1 0.7096288062489997 8.548144031244998 +2016010101 116 1 0.15073998432143232 5.753699921607161 +2016010101 117 1 0.42715238323268545 7.135761916163427 +2016010101 118 1 0.28484987458460453 6.424249372923023 +2016010101 119 1 0.7063924078253888 +2016010101 120 2 0.3770451678732809 6.885225839366404 +2016010101 121 2 0.5717610747690599 7.8588053738453 +2016010101 122 2 0.33468452259997106 +2016010101 123 2 0.30879918837880815 6.543995941894041 +2016010101 124 2 0.3849836657623067 6.9249183288115335 +2016010101 125 2 0.8253177318518972 9.126588659259486 +2016010101 126 2 0.07528317826134201 +2016010101 127 2 0.17979835640940134 5.898991782047006 +2016010101 128 2 0.5938865798343679 7.9694328991718395 +2016010101 129 2 0.5241364770212069 7.620682385106035 +2016010101 130 2 0.650377806309885 8.251889031549425 +2016010101 131 2 0.908464411265929 +2016010101 132 2 0.903869933311308 9.51934966655654 +2016010101 133 2 0.3923787370470804 6.961893685235402 +2016010101 134 2 0.678339455569151 8.391697277845754 +2016010101 135 2 0.8370945820399055 9.185472910199527 +2016010101 136 2 0.9823659928592525 +2016010101 137 2 0.5973573001260816 7.986786500630408 +2016010101 138 2 0.11159339505437194 5.55796697527186 +2016010101 139 2 0.03151833321609543 5.157591666080477 +2016010101 140 5 0.8693677322910309 9.346838661455156 +2016010101 141 5 0.003030221610122341 5.0151511080506115 +2016010101 142 5 0.9613107477492807 9.806553738746404 +2016010101 143 5 0.4361577302611862 +2016010101 144 5 0.4705056424502989 7.352528212251494 +2016010101 145 5 0.8183817017574394 9.091908508787197 +2016010101 146 5 0.13809981677271743 5.690499083863587 +2016010101 147 5 0.9897411652062432 9.948705826031215 +2016010101 148 5 0.6286883763105445 8.143441881552722 +2016010101 149 5 0.8989724226914113 9.494862113457057 +2016010101 150 5 0.05172300710836408 5.258615035541821 +2016010101 151 5 0.09025954533874037 5.451297726693702 +2016010101 152 5 0.9233327114562445 9.616663557281223 +2016010101 153 5 0.18505116412267852 5.925255820613392 +2016010101 154 5 0.7417779396972812 8.708889698486406 +2016010101 155 5 0.8050732025127388 9.025366012563694 +2016010101 156 5 0.5601398337868421 7.8006991689342104 +2016010101 157 5 0.8464473059486204 9.232236529743101 +2016010101 158 5 0.682411260413121 +2016010101 159 5 0.5276480229138161 7.63824011456908 +2016010101 160 0 0.6347552063123598 8.173776031561799 +2016010101 161 0 0.7185106529278713 8.592553264639356 +2016010101 162 0 0.9417808325014447 9.708904162507224 +2016010101 163 0 0.8686954883591914 9.343477441795958 +2016010101 164 0 0.2261535346114757 6.130767673057378 +2016010101 165 0 0.04716008314690556 5.235800415734528 +2016010101 166 0 0.7931208398413077 8.965604199206538 +2016010101 167 0 0.4142931748721116 7.071465874360558 +2016010101 168 0 0.529837490089028 7.64918745044514 +2016010101 169 0 0.5303732367805127 7.6518661839025635 +2016010101 170 0 0.8035559767152117 9.017779883576058 +2016010101 171 0 0.6111942763699542 8.055971381849771 +2016010101 172 0 0.7353232095159933 8.676616047579966 +2016010101 173 0 0.928104137181409 9.640520685907045 +2016010101 174 0 0.33575836584013774 6.678791829200689 +2016010101 175 0 0.28226612089760716 6.411330604488036 +2016010101 176 0 0.9575643056471305 +2016010101 177 0 0.12181338255147656 5.609066912757383 +2016010101 178 0 0.6429226559423997 8.214613279711998 +2016010101 179 0 0.44988064790947546 7.2494032395473775 +2016010101 180 7 0.78684273261408 8.9342136630704 +2016010101 181 7 0.08015144649105144 5.400757232455257 +2016010101 182 7 0.9349467583101043 9.674733791550523 +2016010101 183 7 0.03628262932943005 5.1814131466471505 +2016010101 184 7 0.49275494409647824 7.463774720482391 +2016010101 185 7 0.010528302265064626 5.052641511325323 +2016010101 186 7 0.6857707964854701 8.42885398242735 +2016010101 187 7 0.6152161544983311 +2016010101 188 7 0.9499835900073536 9.749917950036767 +2016010101 189 7 0.7712312579878878 8.856156289939438 +2016010101 190 7 0.8416170695432823 9.208085347716413 +2016010101 191 7 0.4235638782620279 7.11781939131014 +2016010101 192 7 0.3213911944433917 6.606955972216959 +2016010101 193 7 0.967166250138647 9.835831250693236 +2016010101 194 7 0.3100279688446136 6.550139844223068 +2016010101 195 7 0.47384795174189065 7.369239758709453 +2016010101 196 7 0.693915849438657 +2016010101 197 7 0.6112624276966235 8.056312138483118 +2016010101 198 7 0.8387015481238568 +2016010101 199 7 0.6138450065020921 8.06922503251046 +2016010101 200 0 0.39452530098078953 6.972626504903948 +2016010101 201 0 0.606654202262552 8.03327101131276 +2016010101 202 0 0.6930369531147353 8.465184765573676 +2016010101 203 0 0.8268376400943458 9.13418820047173 +2016010101 204 0 0.16709865541519908 5.8354932770759955 +2016010101 205 0 0.1119658182291069 5.559829091145534 +2016010101 206 0 0.5791164469951416 7.895582234975708 +2016010101 207 0 0.238697629956793 6.193488149783965 +2016010101 208 0 0.09597450084980697 +2016010101 209 0 0.39137081642070304 6.956854082103515 +2016010101 210 0 0.11857389954581588 5.59286949772908 +2016010101 211 0 0.8316581331841882 9.15829066592094 +2016010101 212 0 0.6764443140391575 8.382221570195787 +2016010101 213 0 0.34597889286346384 6.729894464317319 +2016010101 214 0 0.49594290913107053 7.479714545655352 +2016010101 215 0 0.7809596211848724 8.904798105924362 +2016010101 216 0 0.2441525108032696 6.220762554016348 +2016010101 217 0 0.6011747145812031 8.005873572906015 +2016010101 218 0 0.10195823574986251 5.509791178749312 +2016010101 219 0 0.6772037039459939 +2016010101 220 2 0.43358582145350044 7.167929107267502 +2016010101 221 2 0.44052858790241434 7.202642939512072 +2016010101 222 2 0.6833739273101953 8.416869636550977 +2016010101 223 2 0.30456438314260315 6.522821915713015 +2016010101 224 2 0.42471506922190116 7.123575346109506 +2016010101 225 2 0.667920433221898 8.33960216610949 +2016010101 226 2 0.34742897723473487 6.737144886173674 +2016010101 227 2 0.7484424682257388 8.742212341128694 +2016010101 228 2 0.7985243044511721 8.99262152225586 +2016010101 229 2 0.6356965175482499 8.17848258774125 +2016010101 230 2 0.5797084633274915 7.8985423166374575 +2016010101 231 2 0.6294304727434736 8.147152363717368 +2016010101 232 2 0.5223336344000465 +2016010101 233 2 0.8766164310278411 9.383082155139206 +2016010101 234 2 0.5223371528847858 7.611685764423929 +2016010101 235 2 0.5108166932292111 7.554083466146055 +2016010101 236 2 0.11887610374813573 5.594380518740679 +2016010101 237 2 0.12234284489677438 5.611714224483872 +2016010101 238 2 0.6248498340908764 8.124249170454382 +2016010101 239 2 0.7834656034655412 8.917328017327705 +2016010101 240 4 0.35418754810034114 6.770937740501706 +2016010101 241 4 0.4459277879363973 7.229638939681987 +2016010101 242 4 0.3172687090508812 6.586343545254406 +2016010101 243 4 0.44184247348425953 7.209212367421298 +2016010101 244 4 0.29194557908914476 6.459727895445724 +2016010101 245 4 0.40660072008804193 7.0330036004402094 +2016010101 246 4 0.8867765989904954 +2016010101 247 4 0.2600547991265878 +2016010101 248 4 0.8217305602477201 9.1086528012386 +2016010101 249 4 0.2661976666002399 +2016010101 250 4 0.7942287602591938 8.97114380129597 +2016010101 251 4 0.12660439158325987 5.6330219579162994 +2016010101 252 4 0.03534081179766135 5.176704058988307 +2016010101 253 4 0.6461794155397513 8.230897077698756 +2016010101 254 4 0.6101431551014658 8.050715775507328 +2016010101 255 4 0.4451958410552058 +2016010101 256 4 0.8676311229161288 9.338155614580643 +2016010101 257 4 0.6404113811732872 8.202056905866435 +2016010101 258 4 0.9933325407201193 9.966662703600598 +2016010101 259 4 0.022339904456672777 5.111699522283364 +2016010101 260 9 0.16883758898222934 5.8441879449111465 +2016010101 261 9 0.2728677716549247 6.364338858274623 +2016010101 262 9 0.5089792699561795 7.544896349780897 +2016010101 263 9 0.9839745145287888 9.919872572643945 +2016010101 264 9 0.5099722427547367 7.549861213773683 +2016010101 265 9 0.767756154664979 8.838780773324896 +2016010101 266 9 0.2509055112434543 6.254527556217271 +2016010101 267 9 0.7888662659669325 +2016010101 268 9 0.050792119331656504 +2016010101 269 9 0.3217075812336384 6.608537906168192 +2016010101 270 9 0.9547918855966856 9.773959427983428 +2016010101 271 9 0.936590995018191 9.682954975090954 +2016010101 272 9 0.9975948944344657 9.98797447217233 +2016010101 273 9 0.7843793586276683 8.921896793138341 +2016010101 274 9 0.4158667880216135 7.079333940108068 +2016010101 275 9 0.07551964942136202 5.3775982471068104 +2016010101 276 9 0.6826280934209041 8.41314046710452 +2016010101 277 9 0.22272664995345326 6.1136332497672665 +2016010101 278 9 0.05564648553770768 5.278232427688538 +2016010101 279 9 0.27048392134017174 6.352419606700859 +2016010101 280 6 0.9871634478417184 9.935817239208593 +2016010101 281 6 0.9658904984418067 9.829452492209032 +2016010101 282 6 0.6204418492301668 8.102209246150833 +2016010101 283 6 0.7672384171102409 8.836192085551204 +2016010101 284 6 0.9388568130371268 +2016010101 285 6 0.78480388849734 8.9240194424867 +2016010101 286 6 0.6158526849608046 8.079263424804022 +2016010101 287 6 0.9876597965511636 +2016010101 288 6 0.7200082551920491 8.600041275960246 +2016010101 289 6 0.5477746385848508 7.738873192924254 +2016010101 290 6 0.34620536841938343 6.731026842096917 +2016010101 291 6 0.4978039853831645 7.489019926915823 +2016010101 292 6 0.9525341590965039 9.762670795482519 +2016010101 293 6 0.8127717437327371 9.063858718663685 +2016010101 294 6 0.19221042530389698 5.961052126519485 +2016010101 295 6 0.8348861622551969 9.174430811275984 +2016010101 296 6 0.8021800523962104 +2016010101 297 6 0.6882413912018509 8.441206956009255 +2016010101 298 6 0.9523172441100431 9.761586220550216 +2016010101 299 6 0.461756774653476 7.30878387326738 +2016010101 300 0 0.09246504881380357 5.462325244069018 +2016010101 301 0 0.772562139764703 8.862810698823516 +2016010101 302 0 0.482064298023964 7.41032149011982 +2016010101 303 0 0.8902250104013026 +2016010101 304 0 0.02562896852381691 5.1281448426190845 +2016010101 305 0 0.6333371730813728 8.166685865406864 +2016010101 306 0 0.44177220677423046 7.208861033871152 +2016010101 307 0 0.3726613084590502 6.863306542295251 +2016010101 308 0 0.4894094608015479 7.447047304007739 +2016010101 309 0 0.39958868704802974 6.997943435240149 +2016010101 310 0 0.46999830766929585 7.34999153834648 +2016010101 311 0 0.48802054917771753 7.440102745888588 +2016010101 312 0 0.5938584842659599 7.969292421329799 +2016010101 313 0 0.9456691108738748 9.728345554369374 +2016010101 314 0 0.2511472506658644 6.255736253329323 +2016010101 315 0 0.665786417516016 +2016010101 316 0 0.0013453475894266154 5.006726737947133 +2016010101 317 0 0.2868290122807833 6.434145061403917 +2016010101 318 0 0.5289610221005768 7.644805110502884 +2016010101 319 0 0.7174272473038222 8.587136236519111 +2016010101 320 6 0.19714701067261287 5.985735053363064 +2016010101 321 6 0.9945281647580165 9.972640823790083 +2016010101 322 6 0.8026497710442094 9.013248855221047 +2016010101 323 6 0.8671959353789589 9.335979676894794 +2016010101 324 6 0.7445308997843215 8.722654498921607 +2016010101 325 6 0.2870995799194088 6.435497899597044 +2016010101 326 6 0.9303101769193158 9.65155088459658 +2016010101 327 6 0.3601633978743961 6.800816989371981 +2016010101 328 6 0.1992689091164963 5.996344545582481 +2016010101 329 6 0.305614677935626 +2016010101 330 6 0.6785651281657815 8.392825640828907 +2016010101 331 6 0.41906769824032675 7.095338491201634 +2016010101 332 6 0.14038910492016632 5.7019455246008315 +2016010101 333 6 0.9564495771842889 9.782247885921445 +2016010101 334 6 0.17008456911161995 5.8504228455581 +2016010101 335 6 0.6768542620524682 8.384271310262342 +2016010101 336 6 0.15447897986634718 5.7723948993317356 +2016010101 337 6 0.7186322093041689 8.593161046520844 +2016010101 338 6 0.35661518326755004 6.783075916337751 +2016010101 339 6 0.9745100065345544 9.872550032672773 +2016010101 340 0 0.941427497924863 9.707137489624316 +2016010101 341 0 0.43710447717457046 7.1855223858728525 +2016010101 342 0 0.8777481481877781 9.388740740938891 +2016010101 343 0 0.470960648820971 7.354803244104855 +2016010101 344 0 0.30816900380820056 6.540845019041003 +2016010101 345 0 0.30917214954754224 6.545860747737711 +2016010101 346 0 0.4425564851469772 7.212782425734886 +2016010101 347 0 0.299926862806396 6.49963431403198 +2016010101 348 0 0.2362865506203755 6.181432753101878 +2016010101 349 0 0.14827145857902235 5.7413572928951115 +2016010101 350 0 0.11205959187249415 5.560297959362471 +2016010101 351 0 0.7735186839840306 8.867593419920153 +2016010101 352 0 0.5545219094617968 7.772609547308984 +2016010101 353 0 0.017881138089139492 5.089405690445697 +2016010101 354 0 0.5456836922655549 7.728418461327775 +2016010101 355 0 0.3232788356193842 6.616394178096921 +2016010101 356 0 0.6969130300239373 +2016010101 357 0 0.41060966897517603 7.05304834487588 +2016010101 358 0 0.5480237679720907 7.740118839860454 +2016010101 359 0 0.8244658047003115 9.122329023501557 +2016010101 360 3 0.3066039757513578 6.5330198787567895 +2016010101 361 3 0.6520147913147185 8.260073956573592 +2016010101 362 3 0.813909833755571 9.069549168777854 +2016010101 363 3 0.31021761081344434 +2016010101 364 3 0.9734060463069539 9.86703023153477 +2016010101 365 3 0.723787775424077 +2016010101 366 3 0.44883375087947996 +2016010101 367 3 0.6863775435229226 8.431887717614613 +2016010101 368 3 0.9514102693152615 9.757051346576308 +2016010101 369 3 0.13600198246182782 5.680009912309139 +2016010101 370 3 0.31393936153828406 6.56969680769142 +2016010101 371 3 0.455490325816286 7.27745162908143 +2016010101 372 3 0.029381871708711316 5.146909358543557 +2016010101 373 3 0.7915100290942785 8.957550145471393 +2016010101 374 3 0.5893961479206987 7.946980739603493 +2016010101 375 3 0.2459562387865848 6.229781193932924 +2016010101 376 3 0.5803679597864608 7.901839798932304 +2016010101 377 3 0.18249653194129645 5.912482659706482 +2016010101 378 3 0.23463750010209627 6.1731875005104815 +2016010101 379 3 0.03506465251415691 5.175323262570784 +2016010101 380 9 0.2200060809428862 6.100030404714431 +2016010101 381 9 0.5112164084058098 7.556082042029049 +2016010101 382 9 0.7587073735272964 8.793536867636483 +2016010101 383 9 0.38770326993957616 6.938516349697881 +2016010101 384 9 0.5064835380779696 7.532417690389848 +2016010101 385 9 0.3259879780193361 +2016010101 386 9 0.4136307484816738 7.068153742408369 +2016010101 387 9 0.6241184363662127 8.120592181831064 +2016010101 388 9 0.011183366683254814 5.0559168334162745 +2016010101 389 9 0.3288959324514742 +2016010101 390 9 0.022892814909948878 5.114464074549744 +2016010101 391 9 0.855327867324107 9.276639336620535 +2016010101 392 9 0.8506016115040703 9.25300805752035 +2016010101 393 9 0.6151078184653772 8.075539092326887 +2016010101 394 9 0.4404683078636561 7.20234153931828 +2016010101 395 9 0.5635667125621039 +2016010101 396 9 0.06039515501092074 5.301975775054604 +2016010101 397 9 0.8914984660494317 9.45749233024716 +2016010101 398 9 0.6899223955618212 8.449611977809106 +2016010101 399 9 0.2774474240264122 diff --git a/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_data.tsv new file mode 100644 index 000000000000..6f7569abaf3a --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/kll/kll_doubles_sketch_data.tsv @@ -0,0 +1,20 @@ +2016010101 3 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAA8Cf3wqChpD9ZbdGsZ0vvP1lt0axnS+8/8Cf3wqChpD8gJbdzU6a7P5b7TmHZzO0/aIP5Ih6/6D9sE0ZR22ToPzpwIpWxaN8/Bqp9A8de5z+OlwjUyS7pP+HljpA3qOA/EFmf9k5T3z8odFsaJLzDP/P7x/Ztb+g/0npWMNwD1T9RTdjUNFHtP8hymF1VcuY/gERSKrM+6j/Q5xXv0arDPzOz39nUGeU/qt/ioEp42D8= +2016010101 0 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAgILAM9MuqT+obh26VwzuPxRFK/l048E/NXKXBM8G5T+oycaQq++3P6huHbpXDO4/xsuAPzpQ4D8I2t/l3ijtP/jKDAqxEs0/jvqS69b80z8YWtCftF3YPwCMcQocr7M/hyxs+TBm7T9ob1lnaCfdP4CCwDPTLqk/3PTDVMXV3j8gUiQj1dLfPxQApBWzXNY/m/hsPIFk6T/QXV9ORn7aP91f0WkpAOg//FttfwcO0z8= +2016010101 3 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAAGAMSz6wFD+FWjsfnVfuP+6pFTc1LNw/lH7G+Kkr5z+FWjsfnVfuPxg7j+/DuLo/JQDDtR086T/AlId5ghftP7kprhFtA+I/7A5FWPC+0T8088l2vufrP/cNV3GJCuA/fDuGZatWxD+rYpPEuQ3nPw6utOFEINg/9Lxscf8o7j/IzfK7YbrYP9w/L7qAp84/9MyHqOXm1j/gODso2ou4PwBgDEs+sBQ/bAbQVz4Vzz8= +2016010101 9 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAwE/BxHbdlj89MZSaaU/nPxEcEjX6KuE/torosnAM5T9H7umS1SXlP77n5jXryeI/PTGUmmlP5z9U8flm4KjmPzjAZC4cnbk/wE/BxHbdlj/gWTs5WUDCPwC1f6lqEsc/MCMCNkvZ2T/DJ55eIpjlP3BJ1Ube8c8/ZsMLElsh5j+wylixpavZP3yP07wAreE/y4x+WeN55j8g3cLXZYveP+J0LNMWxNU/iJSTfLKJ5j8= +2016010101 1 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAeAJDNINCsT+MZ6UcTBzuPw2xn592Guc/KWozNzqa6z9U0ZZr60DCP+T02u+tE9I/aHzWr20Swj94AkM0g0KxP3Z9MOoS3dg/YpY8o6r92z8AGbl3BEHQPz6rg5RdqNY/FAGdKs67yz+cfxYX03/YPxB+J+5p/+A/P0FgTc/w5z+MZ6UcTBzuP/AOmot8Ybk/Y91hkIgX5T8ZWz+TcA/oPxwSblc8nNU/Viwtwm7m5D8= +2016010101 1 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAAGwPOVVEcj9rUARk7JfvP3EWOEDEmuY/thPn9/o60j9WJBnzdlbbP+gXbqNyS8M/enJkeEe15j+UyjpmFEHkP2tQBGTsl+8/vAOkeb7n7D+QEDqaQOOgPz/LON9wCuI/ZrCUngfQ7j9x0RiqxtHsP4BD8SC3rMw/oHD7dDadmT8AbA85VURyP5ARL+Vs2bs/LilRTSeT5T9hVYa6B1/mP7YtZ6Ms99g/ekaNE0m23j8= +2016010101 2 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAUG7GxCsjoD/SDoHOim/vP1BuxsQrI6A/YEg1fmKRvD8lMIIOjR3jP9IOgc6Kb+8/V6Wwk3rJ6j+7MCjy9LTlP+TU0LS7HNk/1Bxto4Ds7D8m8v70IxLtP02lBB7lz+Q/sEtu3LnF4D87gL1tHgHjPzyTU+6hA8c/aEeSJMJFsz8GnGO7AGnqPxBffIeSo9g/oJLHq13D0z/41MKheGvVPxlJqOHdS+I/SFtIDoIh2D8= +2016010101 5 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAACri1tXSaD+bGgKq9avvP0LYRht+4uA/FLC8I1DW5T+uxRqpGBbrP8OkaV+q7OE/zbN14CjD6T9UnPwWpbznP1wv/6zBr8c/qQLhCvGL7T9YuGHjPxu3P8Bf9h9we6o/YOVu0GHE7D/VIfQVNx7kP5saAqr1q+8/QKtPOkGtwT9JLZbSLjDqP4gUurLDHN4//qPXHALq2z8DDNzBDsPuPwAq4tbV0mg/uGdMR9zR6z8= +2016010101 0 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAsM+VzV0lqD+xSBLmXaTuP5z3dzPYytw/gimkiNKS5D8YE0ZuKS+/P7FIEuZdpO4/TImB66UQ0j8sEymoEH3VPxM/j3IHs+0/4YwbisSH5z/jXJBM547jP9XoDwa7tuk/wqtUS9H44D8zEYTAbfTgP9YHQoXHg9o/gJ+c9D5h6T+wz5XNXSWoP9T6g1mZ8sw/a+8Ve1rM6z/M/HKOESPuP9rjhA0K/uY/ZHmCJupP5D8= +2016010101 7 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAgG5Jz9yPhT+FfsSiBvPuPxupd0iepOM/N30JoaTW6j/FYAc5do/jP6J78AKPNOY/LoMAXIZT3j9mG8KMf9fTP4V+xKIG8+4/5tRWX6yR1D/cvzmrqxvbPz4zruuG7uo/1b/WLO2t6D8nL1r8Q2buP6NR8cnZr+M/d5/umNXx5T+AbknP3I+FPzSMdQhMid8/YP3DBKOToj/+Lc52FevtPxj6ZyHOhLQ/I4d1z9At6T8= +2016010101 0 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAQFxm7siRuD/hT2+E8ZzqPxSzJRqnq+U/IIAaWO8Zuj9t3knB0jzjP0BzkbRjQM8/1okPCJ/96D/EqtlTh73fP4IcfaeEJNY/caDLi26l5T/hT2+E8ZzqPzBetezbWr4/9K9ILjgM2T9AXGbuyJG4P4wIJtmkjc4/a28NNx+I4j9oW5C3yqm8P4xfGx59Y8U/ONPpNXR16j9EgxHVWy3mPy1b1hK2aeM/Eg1KDOc/2T8= +2016010101 2 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAACMjfEapuvj9/08zmPQ3sP+icDXUmEuk/VppKFMX+4z/4MrtV3FG/PwjI3xGqbr4/UHP1P5xY4D9uB6Rn/LbgP3/TzOY9Dew/LSurBvW24D/OP/FfSyTkP3uXZsP4jOI/WKAhOaBX5D/OuZXXgo3pPzfyfp498+c/gvq5v0Y81j8glSCsml/lP9JKGx2ILts/4u1HnPt90z9YkJf/Mt7lP5Ccf9GeMdw/rLvJvt6/2z8= +2016010101 4 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAABEmnEXglj+LQQ5TYcnvPwARJpxF4JY/i0EOU2HJ7z/CA0QCQH7kPzVUPViiw+s/vm1pshZ+3D/oPyHwSobjP5ruInSAreQ/0CTF3TAYoj9sprCckjTAPzhj225Sauk/YHMU8GEJ0T8qaEzjnUvqP6ioWuK8pNA/jrZwUXlg7D9kudMGvwXaPwCUmYI8r9I/JlBmpyVH3D8su1pqIU7UP/IfZLQUitw/+HLvPwKr1j8= +2016010101 9 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAwBXZtmwBqj9SFcggTOzvP/D9D8ubT9E/QCgLN7J9rD9Ytb+OToLMPxBbEt8W2OU/yFp4eEFVsz++54K7j53aP0fTnr2iGek/UhXIIEzs7z9umaqtjfjtP033Y7anje4/9oQRZduW1D/AFdm2bAGqPxDcp3dkPuk/olBL/dUO0D951/JadZHoP9H6D0+xUeA/vPT/Hrh87z9KuNnkjkngP6L62GKqdtE/KNCBWXicxT8= +2016010101 6 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAA2LNQ6VmayD9BXLi36JrvP7L1dUlsjd0/0NJbA2J57j9P5V7PEgbmPwRzUYB1q+k/0JtYL2O36j/Ys1DpWZrIP249TuM5Auo/F4606ih77j9+gUI/BdzfP4Llw486KNY/3JrJrV6H4T/Dy5zATgrnP0FcuLfomu8/Lu+hsBC14z+h1FsLHR3pPz/hc3EdC+4/NCW3lDeN6D8xbXDdqNrjP+9jyjCT6O4/Ui2JzNeW7z8= +2016010101 0 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAAGZ++csKVj9WTQHe60LuPy28jfwp9eY/lftYqj/t4D98jtISaFvSPwBmfvnLClY/9kUrUR9O5T/exQXryxLQP1ZNAd7rQu4/UfsLguMA4z/moZ+KujvfP7JbJ8hzFN4/ckOubdyS2T+auB8PfFLfP3A8FNGu2dc/6CEY7/5F3D8EOLdRTETkPwBej/Z6Ppo/xSc4Kbl87D+AQaU2JNreP4tZjTzUuOg/CKetGMqrtz8= +2016010101 6 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAA1GQsK0X4wT9FxdK6LNPvP94Z9psvL+8//ghZfcjS1j9HCZr5CP/mP/wCOZv3xcM/2I31RMqo5T+Yv/HGVMXFPxDyyCQ8m+4/1GQsK0X4wT8MJLBSAdLaP9WyNTfOtuU/Pk+63TCP0z/Q6eHDpIHJP5iNxcfqDNc/ViQg2RnF7T+uv5zq1l/SP3XsLXcy0+c/AKi1sBHA6z9H25iSTq/pP0XF0ros0+8/lOir/Rw8yT8= +2016010101 0 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAQII73W5Pkj9/jmSPLCDuPw52exwGYuo/xt0bJGmJ4T923erGbUfaP499A44cTeY/ilmetpmw1D/7toelPXbhP0CCO91uT5I/eLlBu6S+4T/JzVFBqsDoP+ijS/rvr7w/qHjDJI/6wj+4F7M/oz7OP5yAvHAAMtM/zLGVb9hS3D96Acn7ecnTP9TQP3wKudM/kOUYIjgk3j8J6tJIgxbsP+yYmQ6F+ds/f45kjywg7j8= +2016010101 3 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAADRWCEgWnj+zvtNvJCbvPyB7ln/+86E/DEatApoIzj/0HyneC1zHPyC73dNfkuI/QPPqeH57zz8hpnZPVdziP1ZPLdcMVOk/ADRWCEgWnj8On0HlwCbdP676rh6VF9Q/BK9uUYNowT8GNPny83HuP/14xAnO9uU/gJNXMrG53D+zHBb7RCnnP7O+028kJu8/ZpFF95ra0z9o77uijAvqPzSPph9O3eQ//j0rSGaf0z8= +2016010101 9 BQEPCMgACAAUAAAAAAAAAMgAAQC0AAAAAF9PEU7nhj8ruoPKJ4fsP3hiI9eywdE/dOm2IdgT5j8ruoPKJ4fsP8CuNx8d7K4/5TDyDr0I4j8qukz8oTDcP49Tepf2ruM/Vbfq3iA46z+DdjCM2F7rP+AMaM02cZc/DKhqhqEM1T8AX08RTueGP9jAIDrH+OM/llJWGu142j+e3B6u/NzUP2U7APccNeA/2FU8YCHQ2D/YFJGvVEfoP1ECaYPiW+A/zBBJxSgpzD8= diff --git a/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_build_data.tsv b/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_build_data.tsv new file mode 100644 index 000000000000..286bd2abb4b1 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_build_data.tsv @@ -0,0 +1,400 @@ +2016010101 0 3 0.382342 6.9117103 +2016010101 1 3 0.65940326 8.297016 +2016010101 2 3 0.1536505 5.7682524 +2016010101 3 3 0.8201538 9.100769 +2016010101 4 3 0.7014567 +2016010101 5 3 0.9161629 9.580814 +2016010101 6 3 0.32836062 6.641803 +2016010101 7 3 0.76360226 8.818011 +2016010101 8 3 0.15417911 5.7708955 +2016010101 9 3 0.48945975 7.4472985 +2016010101 10 3 0.52053434 +2016010101 11 3 0.7869615 +2016010101 12 3 0.7303195 +2016010101 13 3 0.490765 7.453825 +2016010101 14 3 0.76231164 8.811558 +2016010101 15 3 0.7733298 8.866649 +2016010101 16 3 0.931256 9.65628 +2016010101 17 3 0.108006686 5.5400333 +2016010101 18 3 0.040295623 5.201478 +2016010101 19 3 0.9779547 9.889773 +2016010101 20 0 0.29773128 6.4886565 +2016010101 21 0 0.7500197 8.750099 +2016010101 22 0 0.41395718 +2016010101 23 0 0.79351866 8.967593 +2016010101 24 0 0.34940794 6.74704 +2016010101 25 0 0.4972432 7.486216 +2016010101 26 0 0.48179755 7.4089875 +2016010101 27 0 0.049185373 5.245927 +2016010101 28 0 0.45553026 7.2776513 +2016010101 29 0 0.91872454 9.593622 +2016010101 30 0 0.07689071 5.384454 +2016010101 31 0 0.38071933 6.903597 +2016010101 32 0 0.3123071 6.5615354 +2016010101 33 0 0.22713292 +2016010101 34 0 0.9112391 9.556195 +2016010101 35 0 0.5097934 7.548967 +2016010101 36 0 0.9390067 9.695033 +2016010101 37 0 0.09350083 5.467504 +2016010101 38 0 0.6570811 8.285406 +2016010101 39 0 0.13975394 5.6987696 +2016010101 40 3 0.2428358 6.214179 +2016010101 41 3 7.892017E-5 5.000395 +2016010101 42 3 0.09588397 5.4794197 +2016010101 43 3 0.35784283 6.789214 +2016010101 44 3 0.23948678 6.197434 +2016010101 45 3 0.38637584 6.931879 +2016010101 46 3 0.94250464 9.712523 +2016010101 47 3 0.37696955 6.8848476 +2016010101 48 3 0.7204255 8.602127 +2016010101 49 3 0.15889494 +2016010101 50 3 0.50128627 7.506431 +2016010101 51 3 0.8720391 9.360195 +2016010101 52 3 0.277279 6.386395 +2016010101 53 3 0.56291825 7.814591 +2016010101 54 3 0.90911984 9.545599 +2016010101 55 3 0.7885884 8.942942 +2016010101 56 3 0.10438179 5.5219088 +2016010101 57 3 0.94819504 9.740975 +2016010101 58 3 0.7240801 8.6204 +2016010101 59 3 0.44019824 7.200991 +2016010101 60 9 0.70430875 8.5215435 +2016010101 61 9 0.34009334 6.7004666 +2016010101 62 9 0.47725818 7.386291 +2016010101 63 9 0.7023789 8.511894 +2016010101 64 9 0.5523685 7.7618427 +2016010101 65 9 0.40110153 7.0055075 +2016010101 66 9 0.6915718 8.457859 +2016010101 67 9 0.24956873 6.2478437 +2016010101 68 9 0.6748211 +2016010101 69 9 0.40388757 7.019438 +2016010101 70 9 0.18024953 5.9012475 +2016010101 71 9 0.14258876 5.712944 +2016010101 72 9 0.022329193 5.111646 +2016010101 73 9 0.100053556 5.500268 +2016010101 74 9 0.7081148 8.540574 +2016010101 75 9 0.7284439 8.64222 +2016010101 76 9 0.5871483 7.9357414 +2016010101 77 9 0.66086847 8.304342 +2016010101 78 9 0.6577686 8.288843 +2016010101 79 9 0.5364963 +2016010101 80 1 0.653129 8.265645 +2016010101 81 1 0.33766088 6.6883044 +2016010101 82 1 0.75188476 8.759423 +2016010101 83 1 0.65912277 8.295613 +2016010101 84 1 0.09914377 +2016010101 85 1 0.94095427 9.704771 +2016010101 86 1 0.7481457 8.740728 +2016010101 87 1 0.5311784 7.6558924 +2016010101 88 1 0.3828018 6.914009 +2016010101 89 1 0.21666886 6.0833445 +2016010101 90 1 0.35402623 6.770131 +2016010101 91 1 0.25396836 6.2698417 +2016010101 92 1 0.43735757 7.186788 +2016010101 93 1 0.38849327 6.9424663 +2016010101 94 1 0.06742115 5.3371058 +2016010101 95 1 0.14118738 5.705937 +2016010101 96 1 0.28245112 6.412256 +2016010101 97 1 0.14260618 5.713031 +2016010101 98 1 0.8625766 9.312883 +2016010101 99 1 0.7219804 8.609902 +2016010101 100 1 0.47987583 7.3993793 +2016010101 101 1 0.39008632 6.950432 +2016010101 102 1 0.6991004 8.4955015 +2016010101 103 1 0.67421305 8.371065 +2016010101 104 1 0.1087864 5.543932 +2016010101 105 1 0.0044596987 5.0222983 +2016010101 106 1 0.025013782 5.125069 +2016010101 107 1 0.22402085 6.1201043 +2016010101 108 1 0.9006074 9.503037 +2016010101 109 1 0.96289426 9.814471 +2016010101 110 1 0.5637745 7.8188725 +2016010101 111 1 0.0329838 5.164919 +2016010101 112 1 0.9032891 9.516445 +2016010101 113 1 0.9872953 +2016010101 114 1 0.6329443 8.1647215 +2016010101 115 1 0.7096288 8.548144 +2016010101 116 1 0.15073998 5.7537 +2016010101 117 1 0.4271524 7.1357617 +2016010101 118 1 0.28484988 6.424249 +2016010101 119 1 0.7063924 +2016010101 120 2 0.37704515 6.885226 +2016010101 121 2 0.5717611 7.858805 +2016010101 122 2 0.33468452 +2016010101 123 2 0.30879918 6.543996 +2016010101 124 2 0.38498366 6.924918 +2016010101 125 2 0.82531774 9.126589 +2016010101 126 2 0.07528318 +2016010101 127 2 0.17979835 5.8989916 +2016010101 128 2 0.59388655 7.969433 +2016010101 129 2 0.5241365 7.6206822 +2016010101 130 2 0.6503778 8.251889 +2016010101 131 2 0.90846443 +2016010101 132 2 0.9038699 9.51935 +2016010101 133 2 0.39237875 6.9618936 +2016010101 134 2 0.6783395 8.391697 +2016010101 135 2 0.8370946 9.1854725 +2016010101 136 2 0.98236597 +2016010101 137 2 0.5973573 7.9867864 +2016010101 138 2 0.111593395 5.557967 +2016010101 139 2 0.031518333 5.157592 +2016010101 140 5 0.8693677 9.346839 +2016010101 141 5 0.0030302217 5.015151 +2016010101 142 5 0.96131074 9.806554 +2016010101 143 5 0.43615773 +2016010101 144 5 0.47050565 7.352528 +2016010101 145 5 0.8183817 9.091908 +2016010101 146 5 0.13809982 5.6904993 +2016010101 147 5 0.98974115 9.948706 +2016010101 148 5 0.6286884 8.143442 +2016010101 149 5 0.89897245 9.494863 +2016010101 150 5 0.051723007 5.258615 +2016010101 151 5 0.090259545 5.4512978 +2016010101 152 5 0.9233327 9.616664 +2016010101 153 5 0.18505116 5.925256 +2016010101 154 5 0.74177796 8.70889 +2016010101 155 5 0.8050732 9.025366 +2016010101 156 5 0.56013983 7.800699 +2016010101 157 5 0.8464473 9.232237 +2016010101 158 5 0.68241125 +2016010101 159 5 0.52764803 7.6382403 +2016010101 160 0 0.6347552 8.173776 +2016010101 161 0 0.7185106 8.592553 +2016010101 162 0 0.9417808 9.708904 +2016010101 163 0 0.8686955 9.343477 +2016010101 164 0 0.22615354 6.130768 +2016010101 165 0 0.04716008 5.2358003 +2016010101 166 0 0.79312086 8.965604 +2016010101 167 0 0.41429317 7.071466 +2016010101 168 0 0.5298375 7.6491876 +2016010101 169 0 0.5303732 7.651866 +2016010101 170 0 0.80355597 9.01778 +2016010101 171 0 0.61119425 8.055971 +2016010101 172 0 0.7353232 8.676616 +2016010101 173 0 0.92810416 9.640521 +2016010101 174 0 0.33575836 6.678792 +2016010101 175 0 0.2822661 6.4113307 +2016010101 176 0 0.9575643 +2016010101 177 0 0.12181338 5.609067 +2016010101 178 0 0.64292264 8.214613 +2016010101 179 0 0.44988066 7.2494035 +2016010101 180 7 0.7868427 8.934214 +2016010101 181 7 0.080151446 5.4007573 +2016010101 182 7 0.9349468 9.674734 +2016010101 183 7 0.03628263 5.181413 +2016010101 184 7 0.49275494 7.4637747 +2016010101 185 7 0.010528302 5.0526414 +2016010101 186 7 0.6857708 8.428854 +2016010101 187 7 0.61521614 +2016010101 188 7 0.9499836 9.749918 +2016010101 189 7 0.77123123 8.856156 +2016010101 190 7 0.84161705 9.208085 +2016010101 191 7 0.42356387 7.1178193 +2016010101 192 7 0.3213912 6.606956 +2016010101 193 7 0.96716624 9.835832 +2016010101 194 7 0.31002796 6.55014 +2016010101 195 7 0.47384796 7.36924 +2016010101 196 7 0.69391584 +2016010101 197 7 0.61126244 8.056313 +2016010101 198 7 0.83870155 +2016010101 199 7 0.613845 8.069225 +2016010101 200 0 0.3945253 6.9726267 +2016010101 201 0 0.6066542 8.033271 +2016010101 202 0 0.693037 8.465185 +2016010101 203 0 0.82683766 9.134189 +2016010101 204 0 0.16709866 5.835493 +2016010101 205 0 0.11196582 5.559829 +2016010101 206 0 0.57911646 7.895582 +2016010101 207 0 0.23869763 6.193488 +2016010101 208 0 0.0959745 +2016010101 209 0 0.3913708 6.956854 +2016010101 210 0 0.1185739 5.5928693 +2016010101 211 0 0.8316581 9.158291 +2016010101 212 0 0.6764443 8.382221 +2016010101 213 0 0.3459789 6.7298946 +2016010101 214 0 0.49594292 7.4797144 +2016010101 215 0 0.7809596 8.9047985 +2016010101 216 0 0.24415252 6.2207627 +2016010101 217 0 0.6011747 8.005874 +2016010101 218 0 0.10195824 5.5097914 +2016010101 219 0 0.6772037 +2016010101 220 2 0.43358582 7.167929 +2016010101 221 2 0.4405286 7.202643 +2016010101 222 2 0.6833739 8.416869 +2016010101 223 2 0.3045644 6.522822 +2016010101 224 2 0.42471507 7.123575 +2016010101 225 2 0.6679204 8.339602 +2016010101 226 2 0.34742898 6.737145 +2016010101 227 2 0.7484425 8.742212 +2016010101 228 2 0.7985243 8.992621 +2016010101 229 2 0.63569653 8.178483 +2016010101 230 2 0.57970846 7.8985424 +2016010101 231 2 0.6294305 8.147152 +2016010101 232 2 0.5223336 +2016010101 233 2 0.8766164 9.383082 +2016010101 234 2 0.52233714 7.6116858 +2016010101 235 2 0.5108167 7.5540833 +2016010101 236 2 0.11887611 5.5943804 +2016010101 237 2 0.12234285 5.6117144 +2016010101 238 2 0.62484986 8.124249 +2016010101 239 2 0.7834656 8.917328 +2016010101 240 4 0.35418755 6.770938 +2016010101 241 4 0.4459278 7.229639 +2016010101 242 4 0.3172687 6.586344 +2016010101 243 4 0.44184247 7.2092123 +2016010101 244 4 0.29194558 6.459728 +2016010101 245 4 0.4066007 7.033004 +2016010101 246 4 0.8867766 +2016010101 247 4 0.2600548 +2016010101 248 4 0.82173055 9.108653 +2016010101 249 4 0.26619768 +2016010101 250 4 0.79422873 8.971144 +2016010101 251 4 0.1266044 5.633022 +2016010101 252 4 0.035340812 5.176704 +2016010101 253 4 0.64617944 8.230897 +2016010101 254 4 0.6101432 8.050715 +2016010101 255 4 0.44519585 +2016010101 256 4 0.86763114 9.338156 +2016010101 257 4 0.6404114 8.202057 +2016010101 258 4 0.99333256 9.966662 +2016010101 259 4 0.022339905 5.1116996 +2016010101 260 9 0.16883759 5.8441877 +2016010101 261 9 0.27286777 6.364339 +2016010101 262 9 0.50897926 7.544896 +2016010101 263 9 0.9839745 9.919872 +2016010101 264 9 0.5099722 7.5498614 +2016010101 265 9 0.76775616 8.83878 +2016010101 266 9 0.2509055 6.2545276 +2016010101 267 9 0.7888663 +2016010101 268 9 0.05079212 +2016010101 269 9 0.32170758 6.6085377 +2016010101 270 9 0.9547919 9.773959 +2016010101 271 9 0.93659097 9.682955 +2016010101 272 9 0.9975949 9.987974 +2016010101 273 9 0.78437936 8.921897 +2016010101 274 9 0.4158668 7.079334 +2016010101 275 9 0.07551965 5.3775983 +2016010101 276 9 0.6826281 8.41314 +2016010101 277 9 0.22272664 6.113633 +2016010101 278 9 0.055646487 5.2782326 +2016010101 279 9 0.2704839 6.3524194 +2016010101 280 6 0.9871634 9.935817 +2016010101 281 6 0.9658905 9.8294525 +2016010101 282 6 0.62044185 8.102209 +2016010101 283 6 0.76723844 8.836192 +2016010101 284 6 0.93885684 +2016010101 285 6 0.78480387 8.92402 +2016010101 286 6 0.6158527 8.079264 +2016010101 287 6 0.9876598 +2016010101 288 6 0.72000825 8.600041 +2016010101 289 6 0.5477746 7.738873 +2016010101 290 6 0.34620535 6.7310266 +2016010101 291 6 0.497804 7.48902 +2016010101 292 6 0.95253414 9.7626705 +2016010101 293 6 0.81277174 9.063859 +2016010101 294 6 0.19221042 5.961052 +2016010101 295 6 0.83488613 9.174431 +2016010101 296 6 0.80218005 +2016010101 297 6 0.68824136 8.441207 +2016010101 298 6 0.95231724 9.761586 +2016010101 299 6 0.46175677 7.308784 +2016010101 300 0 0.09246505 5.462325 +2016010101 301 0 0.77256215 8.862811 +2016010101 302 0 0.4820643 7.4103217 +2016010101 303 0 0.890225 +2016010101 304 0 0.02562897 5.1281447 +2016010101 305 0 0.6333372 8.166686 +2016010101 306 0 0.4417722 7.208861 +2016010101 307 0 0.37266132 6.8633065 +2016010101 308 0 0.48940945 7.447047 +2016010101 309 0 0.39958867 6.9979434 +2016010101 310 0 0.4699983 7.3499913 +2016010101 311 0 0.48802054 7.4401026 +2016010101 312 0 0.5938585 7.9692926 +2016010101 313 0 0.9456691 9.728346 +2016010101 314 0 0.25114724 6.2557364 +2016010101 315 0 0.66578645 +2016010101 316 0 0.0013453476 5.0067267 +2016010101 317 0 0.28682902 6.434145 +2016010101 318 0 0.528961 7.644805 +2016010101 319 0 0.71742725 8.587136 +2016010101 320 6 0.19714701 5.985735 +2016010101 321 6 0.9945282 9.972641 +2016010101 322 6 0.8026498 9.013248 +2016010101 323 6 0.86719596 9.335979 +2016010101 324 6 0.7445309 8.722654 +2016010101 325 6 0.28709957 6.4354978 +2016010101 326 6 0.9303102 9.651551 +2016010101 327 6 0.3601634 6.800817 +2016010101 328 6 0.1992689 5.9963446 +2016010101 329 6 0.30561468 +2016010101 330 6 0.67856514 8.392826 +2016010101 331 6 0.4190677 7.0953383 +2016010101 332 6 0.1403891 5.7019453 +2016010101 333 6 0.95644957 9.782248 +2016010101 334 6 0.17008457 5.850423 +2016010101 335 6 0.67685425 8.384272 +2016010101 336 6 0.15447898 5.772395 +2016010101 337 6 0.7186322 8.593161 +2016010101 338 6 0.3566152 6.783076 +2016010101 339 6 0.97451 9.87255 +2016010101 340 0 0.94142747 9.707137 +2016010101 341 0 0.43710446 7.1855226 +2016010101 342 0 0.87774813 9.388741 +2016010101 343 0 0.47096065 7.354803 +2016010101 344 0 0.308169 6.540845 +2016010101 345 0 0.30917215 6.545861 +2016010101 346 0 0.44255647 7.2127824 +2016010101 347 0 0.29992688 6.4996343 +2016010101 348 0 0.23628655 6.1814327 +2016010101 349 0 0.14827146 5.7413573 +2016010101 350 0 0.11205959 5.560298 +2016010101 351 0 0.7735187 8.867594 +2016010101 352 0 0.5545219 7.7726097 +2016010101 353 0 0.017881138 5.0894055 +2016010101 354 0 0.5456837 7.7284184 +2016010101 355 0 0.32327884 6.616394 +2016010101 356 0 0.696913 +2016010101 357 0 0.41060966 7.053048 +2016010101 358 0 0.54802376 7.740119 +2016010101 359 0 0.8244658 9.122329 +2016010101 360 3 0.30660397 6.53302 +2016010101 361 3 0.6520148 8.260074 +2016010101 362 3 0.8139098 9.06955 +2016010101 363 3 0.31021762 +2016010101 364 3 0.973406 9.86703 +2016010101 365 3 0.7237878 +2016010101 366 3 0.44883376 +2016010101 367 3 0.6863775 8.431888 +2016010101 368 3 0.9514103 9.757051 +2016010101 369 3 0.13600199 5.68001 +2016010101 370 3 0.31393936 6.569697 +2016010101 371 3 0.45549032 7.2774515 +2016010101 372 3 0.029381871 5.146909 +2016010101 373 3 0.79151005 8.95755 +2016010101 374 3 0.5893961 7.946981 +2016010101 375 3 0.24595624 6.229781 +2016010101 376 3 0.580368 7.9018397 +2016010101 377 3 0.18249653 5.9124827 +2016010101 378 3 0.2346375 6.1731877 +2016010101 379 3 0.035064653 5.1753235 +2016010101 380 9 0.22000608 6.1000304 +2016010101 381 9 0.5112164 7.5560822 +2016010101 382 9 0.75870734 8.793537 +2016010101 383 9 0.38770327 6.938516 +2016010101 384 9 0.50648355 7.532418 +2016010101 385 9 0.32598796 +2016010101 386 9 0.41363075 7.068154 +2016010101 387 9 0.62411845 8.120592 +2016010101 388 9 0.011183367 5.055917 +2016010101 389 9 0.32889593 +2016010101 390 9 0.022892814 5.1144643 +2016010101 391 9 0.85532784 9.276639 +2016010101 392 9 0.8506016 9.253008 +2016010101 393 9 0.61510783 8.075539 +2016010101 394 9 0.4404683 7.2023416 +2016010101 395 9 0.5635667 +2016010101 396 9 0.060395155 5.3019757 +2016010101 397 9 0.89149845 9.457492 +2016010101 398 9 0.6899224 8.449612 +2016010101 399 9 0.27744743 diff --git a/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_data.tsv new file mode 100644 index 000000000000..c47701fa123c --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/kll/kll_floats_sketch_data.tsv @@ -0,0 +1,20 @@ +2016010101 3 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAABg0lPT1bej89W3o/Bg0lPZwy3T3LZm4/8fhFP9smQz+NRfs+OPY6P092ST+9QQU/eJr6PiHhHT5we0M/4h6oPqeJaj+rkjM/mfVRP49WHT6nzig/VcLDPg== +2016010101 0 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAmnZJPb5icD+oGw8+eDYoP119vz2+YnA/0oECP/dGaT+IlWg+t+afPqXtwj7geJ09iDFrP0M76T6adkk9K672PqmW/j6Z5bI+CiRLPzLy0z5LAUA/PHCYPg== +2016010101 3 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAA8oGlOOm8cj+qYeE+UF05P+m8cj8fxtU97uBJPxS8aD9pGxA/g/eNPvQ9Xz9MVAA/W7UiPs5tOD8nAsE+/EdxPw7TxT4GPHU+LTe3PtFexD3ygaU486l4Pg== +2016010101 9 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAtuu2PE17Oj/SVwk/hmMoP60uKT9aTxY/TXs6PwNHNT/h6Mw9tuu2PMoCEj5Vkzg+WsrOPhPBLD/yjn8+2QoxPy5dzT4GaA0/G88zPy9b9D63IK4+lE00Pw== +2016010101 1 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAGhSKPWHicD+10zg/0tFcP1sHEj5vnZA+bZMQPhoUij2X6MY+Ve3fPiQIgj7tQrU+cd5dPpn+wz5P+wc/eoY/P2HicD/kC8s9RbwoP4V7QD/j4aw+djMnPw== +2016010101 1 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAqiKSO2O/fD8i1jQ/2NeRPriz2j6VWxo+PKo1P6MIIj9jv3w/9D1nPwUaBz2HUxA/PYB2PzWOZj+5ZWU+tOnMPKoikjtny949OpksPz74Mj9lucc+SbL1Pg== +2016010101 2 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAXhkBPVZ8ez9eGQE9FIvkPWjsGD9WfHs/1UtWP6inLT/e5cg+BWRnPyCRaD8pfyY/zy0GP/MIGD8PHTg+ES6aPQZIUz+UHMU+7RqePsVbqz7vXhI/EAzBPg== +2016010101 5 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAr5ZGO61ffT/xEwc/gbIuP8WwWD9TZQ8/RxlOPynlPT8Nfj0+iF9sP//ZuD2B21M9DyNmP7nxID+tX30/CmoNPneBUT8e5vA+EVDfPnYYdj+vlkY74o5ePw== +2016010101 0 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAA7ipBPe8idT/CVuY+lJYkP0t5+T3vInU/L4WQPoXoqz48mG0/JD48Pzp3HD/YtU0/isYHP26jBz88HtQ++AlLP+4qQT3LlGc+1GJeP4wYcT9Q8Dc/UX8iPw== +2016010101 7 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAA5n4sPDWYdz/yJB0/JbVWP7J7HD94pDE/M5zyPvy7nj41mHc/Y42kPl3d2D43dFc/aW9FPyAycz/Ofh0/rY4vP+Z+LDxgSvw+GJ0UPaxYbz9xJqQ9hm5JPw== +2016010101 0 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAR47EPYznVD85XS0/e8/QPZbmGT8eA3o++OxHPzvs/T4lJLE+dCstP4znVD/f1vI9wWHIPkeOxD0nbXQ++kAUP1ZO5T3pGys+oqtTP99qMT+xTRs/OP/JPg== +2016010101 2 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAUXXzPe9pYD80kUg/KfYfP+OO+j1RdfM94sQCP+O3BT/vaWA/qLcFP1siIT/GZxQ/Ar0iPxdsTD/tmT8/NuKxPtX8Kj9BdNk+3e+bPpjxLj/3jOE+9v7dPg== +2016010101 4 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAALQK3PAtLfj8tArc8C0t+PwDyIz8THV4/tvDjPlgyHD8EbCU/h8EQPZWkAT6TUks/EEuIPu9cUj/nJYU+ywNjP/gt0D7keZU+LTniPgtxoj6mUOQ+Eli1Pg== +2016010101 9 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAZgtQPWFifz/efIo+ku1jPXQSZD63wC4/DKqaPX7s1D4WzUg/YWJ/P23Ebz8+bXQ/27akPmYLUD0k80k/sHaAPquLRD+KjQI/weV7P3dMAj9TtYs+w+MsPg== +2016010101 6 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAz9JEPkbXfD9ia+w+EMtzP5YwMD+sW00/GbtVP8/SRD7PEVA/R9lzPyrg/j7UQbE+9ToMP3ZSOD9G13w/hqgdP+joSD/sWHA/vWlEP0fVHj+aRHc/vrZ8Pw== +2016010101 0 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAYFawOl8Xcj9QqTc//WkHP0Hbkj5gVrA6+3AqP1+WgD5fF3I/HAcYP9Td+T6eo/A+45bMPuCT+j53zb4+9y/iPmMiIj/Y89E8yeVjPyLR9j6ixkU/UV69PQ== +2016010101 6 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAKcIPPmaZfj99eXk/RJa2Pkj4Nz+9Lx4+UkYtP6YqLj7h2XQ/KcIPPguQ1j5yti0/h3mcPiYNTD5WZ7g+zyhuP7f+kj6UmT4/jgBeP3V6TT9mmX4/6OBJPg== +2016010101 0 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAd3uSPGQBcT8xEFM/SUsMP2470j7kaDI/zoSlPu2xCz93e5I8JvUNP1IFRj+Af+U9edQXPhr1cT4EkJk+w5biPtBLnj5UyJ0+wSHxPhq0YD8ozN8+ZAFxPw== +2016010101 3 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAQLLwPCMxeT/0nw890ERwPl/gOj7/khQ/9Nt7PqriFj9noEo/QLLwPAc26T6pvKA+G0QLPqCPcz9wti8/is3lPihKOT8jMXk/2NSePmVcUD9x6iY/MvucPg== +2016010101 9 BQEPAMgACAAUAAAAAAAAAMgAAQC0AAAAcTo3PD45ZD+XDY4+wZ4wPz45ZD/pYHc96EUQPxCF4T61dx0/B8FZP8T2Wj+2ibs8DGWoPnE6Nzw6xh8/acfTPuXnpj7oqAE/C4HGPqU6Qj8U3wI/RklhPg== diff --git a/pom.xml b/pom.xml index 464222b198fa..218439c23e1d 100644 --- a/pom.xml +++ b/pom.xml @@ -84,7 +84,7 @@ 1.21.0 - 3.1.0 + 3.2.0 2.0.0 10.14.2.0 4.0.0 diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index f5d08abcadea..933b4b454f44 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -143,6 +143,11 @@ public class AggregatorUtil // expression lambda aggregator public static final byte EXPRESSION_LAMBDA_CACHE_TYPE_ID = 0x47; + // KLL sketch aggregator + public static final byte KLL_DOUBLES_SKETCH_BUILD_CACHE_TYPE_ID = 0x48; + public static final byte KLL_DOUBLES_SKETCH_MERGE_CACHE_TYPE_ID = 0x49; + public static final byte KLL_FLOATS_SKETCH_BUILD_CACHE_TYPE_ID = 0x4A; + public static final byte KLL_FLOATS_SKETCH_MERGE_CACHE_TYPE_ID = 0x4B; /** * returns the list of dependent postAggregators that should be calculated in order to calculate given postAgg diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java index 716448b1ec87..f65208bd9069 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java @@ -54,4 +54,16 @@ public class PostAggregatorIds public static final byte TDIGEST_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 30; public static final byte TDIGEST_SKETCH_TO_QUANTILE_CACHE_TYPE_ID = 31; public static final byte HLL_SKETCH_TO_ESTIMATE_CACHE_TYPE_ID = 32; + public static final byte KLL_DOUBLES_SKETCH_TO_RANK_CACHE_TYPE_ID = 33; + public static final byte KLL_DOUBLES_SKETCH_TO_CDF_CACHE_TYPE_ID = 34; + public static final byte KLL_DOUBLES_SKETCH_TO_HISTOGRAM_CACHE_TYPE_ID = 35; + public static final byte KLL_DOUBLES_SKETCH_TO_QUANTILE_CACHE_TYPE_ID = 36; + public static final byte KLL_DOUBLES_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 37; + public static final byte KLL_DOUBLES_SKETCH_TO_STRING_CACHE_TYPE_ID = 38; + public static final byte KLL_FLOATS_SKETCH_TO_RANK_CACHE_TYPE_ID = 39; + public static final byte KLL_FLOATS_SKETCH_TO_CDF_CACHE_TYPE_ID = 40; + public static final byte KLL_FLOATS_SKETCH_TO_HISTOGRAM_CACHE_TYPE_ID = 41; + public static final byte KLL_FLOATS_SKETCH_TO_QUANTILE_CACHE_TYPE_ID = 42; + public static final byte KLL_FLOATS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 43; + public static final byte KLL_FLOATS_SKETCH_TO_STRING_CACHE_TYPE_ID = 44; } From 5616648010195e5828ee3f25c3e9e8d3a665ba30 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Wed, 4 May 2022 16:31:29 -0700 Subject: [PATCH 02/10] added documentation --- .../extensions-core/datasketches-extension.md | 1 + .../extensions-core/datasketches-kll.md | 138 ++++++++++++++++++ .../extensions-core/datasketches-quantiles.md | 2 +- 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 docs/development/extensions-core/datasketches-kll.md diff --git a/docs/development/extensions-core/datasketches-extension.md b/docs/development/extensions-core/datasketches-extension.md index df581616704e..c05806d901b7 100644 --- a/docs/development/extensions-core/datasketches-extension.md +++ b/docs/development/extensions-core/datasketches-extension.md @@ -36,4 +36,5 @@ The following modules are available: * [Theta sketch](datasketches-theta.md) - approximate distinct counting with set operations (union, intersection and set difference). * [Tuple sketch](datasketches-tuple.md) - extension of Theta sketch to support values associated with distinct keys (arrays of numeric values in this specialized implementation). * [Quantiles sketch](datasketches-quantiles.md) - approximate distribution of comparable values to obtain ranks, quantiles and histograms. This is a specialized implementation for numeric values. +* [KLL Quantiles sketch](datasketches-kll.md) - approximate distribution of comparable values to obtain ranks, quantiles and histograms. This is a specialized implementation for numeric values. This is a more advanced algorithm compared to the classic quantiles above, sketches are more compact for the same accuracy, or more accurate for the same size. * [HLL sketch](datasketches-hll.md) - approximate distinct counting using very compact HLL sketch. diff --git a/docs/development/extensions-core/datasketches-kll.md b/docs/development/extensions-core/datasketches-kll.md new file mode 100644 index 000000000000..ebc29e42c932 --- /dev/null +++ b/docs/development/extensions-core/datasketches-kll.md @@ -0,0 +1,138 @@ +--- +id: datasketches-kll +title: "DataSketches KLL Sketch module" +--- + + + + +This module provides Apache Druid aggregators based on numeric quantiles KllFloatsSketch and KllDoublesSketch from [Apache DataSketches](https://datasketches.apache.org/) library. KLL quantiles sketch is a mergeable streaming algorithm to estimate the distribution of values, and approximately answer queries about the rank of a value, probability mass function of the distribution (PMF) or histogram, cumulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such). See [Quantiles Sketch Overview](https://datasketches.apache.org/docs/Quantiles/QuantilesOverview). This document applies to both KllFloatsSketch and KllDoublesSketch. Only one of them will be used in the examples. + +There are three major modes of operation: + +1. Ingesting sketches built outside of Druid (say, with Pig or Hive) +2. Building sketches from raw data during ingestion +3. Building sketches from raw data at query time + +To use this aggregator, make sure you [include](../../development/extensions.md#loading-extensions) the extension in your config file: + +``` +druid.extensions.loadList=["druid-datasketches"] +``` + +### Aggregator + +The result of the aggregation is a KllFloatsSketch or KllDoublesSketch that is the union of all sketches either built from raw data or read from the segments. + +```json +{ + "type" : "KllDoublesSketch", + "name" : , + "fieldName" : , + "k": + } +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This String should be "KllFloatsSketch" or "KllDoublesSketch"|yes| +|name|A String for the output (result) name of the calculation.|yes| +|fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| +|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be from 8 to 65535.|no, defaults to 200| +|maxStreamLength|This parameter defines the number of items presented to each sketch before it might, in the context of a BufferAggregator, grow larger than a preallocated memory region and need to move on heap. Ideally just a few sketches should grow that large.|no, defaults to 1000000000| + +### Post Aggregators + +#### Quantile + +This returns an approximation to the value that would be preceded by a given fraction of a hypothetical sorted version of the input stream. + +```json +{ + "type" : "KllDoublesSketchToQuantile", + "name": , + "field" : , + "fraction" : +} +``` + +#### Quantiles + +This returns an array of quantiles corresponding to a given array of fractions + +```json +{ + "type" : "KllDoublesSketchToQuantiles", + "name": , + "field" : , + "fractions" : +} +``` + +#### Histogram + +This returns an approximation to the histogram given an array of split points that define the histogram bins or a number of bins (not both). An array of m unique, monotonically increasing split points divide the real number line into m+1 consecutive disjoint intervals. The definition of an interval is inclusive of the left split point and exclusive of the right split point. If the number of bins is specified instead of split points, the interval between the minimum and maximum values is divided into the given number of equally-spaced bins. + +```json +{ + "type" : "KllDoublesSketchToHistogram", + "name": , + "field" : , + "splitPoints" : , + "numBins" : +} +``` + +#### Rank + +This returns an approximation to the rank of a given value that is the fraction of the distribution less than that value. + +```json +{ + "type" : "KllDoublesSketchToRank", + "name": , + "field" : , + "value" : +} +``` +#### CDF + +This returns an approximation to the Cumulative Distribution Function given an array of split points that define the edges of the bins. An array of m unique, monotonically increasing split points divide the real number line into m+1 consecutive disjoint intervals. The definition of an interval is inclusive of the left split point and exclusive of the right split point. The resulting array of fractions can be viewed as ranks of each split point with one additional rank that is always 1. + +```json +{ + "type" : "KllDoublesSketchToCDF", + "name": , + "field" : , + "splitPoints" : +} +``` + +#### Sketch Summary + +This returns a summary of the sketch that can be used for debugging. This is the result of calling toString() method. + +```json +{ + "type" : "KllDoublesSketchToString", + "name": , + "field" : +} +``` diff --git a/docs/development/extensions-core/datasketches-quantiles.md b/docs/development/extensions-core/datasketches-quantiles.md index 850d84846c43..a4428a618fcc 100644 --- a/docs/development/extensions-core/datasketches-quantiles.md +++ b/docs/development/extensions-core/datasketches-quantiles.md @@ -56,7 +56,7 @@ The result of the aggregation is a DoublesSketch that is the union of all sketch |name|A String for the output (result) name of the calculation.|yes| |fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| |k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2 from 2 to 32768. See [accuracy information](https://datasketches.apache.org/docs/Quantiles/OrigQuantilesSketch) in the DataSketches documentation for details.|no, defaults to 128| -|maxStreamLength|This parameter is a temporary solution to avoid a [known issue](https://github.com/apache/druid/issues/11544). It may be removed in a future release after the bug is fixed. This parameter defines the maximum number of items to store in each sketch. If a sketch reaches the limit, the query can throw `IllegalStateException`. To workaround this issue, increase the maximum stream length. See [accuracy information](https://datasketches.apache.org/docs/Quantiles/OrigQuantilesSketch) in the DataSketches documentation for how many bytes are required per stream length.|no, defaults to 1000000000| +|maxStreamLength|This parameter defines the number of items presented to each sketch before it might, in the context of a BufferAggregator, grow larger than a preallocated memory region and need to move on heap. Ideally just a few sketches should grow that large.|no, defaults to 1000000000| ### Post Aggregators From b36b096960612b2481a04c0342a042c9f99cf4b4 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Wed, 4 May 2022 16:49:16 -0700 Subject: [PATCH 03/10] direct static refs --- .../kll/KllDoublesSketchAggregatorFactoryTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java index 2cfe561ab59c..c56598e4a720 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactoryTest.java @@ -79,8 +79,8 @@ public void testDefaultParams() null ); - Assert.assertEquals(KllDoublesSketchAggregatorFactory.DEFAULT_K, factory.getK()); - Assert.assertEquals(KllDoublesSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); + Assert.assertEquals(KllSketchAggregatorFactory.DEFAULT_K, factory.getK()); + Assert.assertEquals(KllSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); } @Test From 798fc9d50d9b72ff7a7c4520102551072dd48ca5 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Wed, 4 May 2022 16:50:20 -0700 Subject: [PATCH 04/10] direct static refs --- .../kll/KllFloatsSketchAggregatorFactoryTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java index 25eae831db4c..be0171fd109e 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactoryTest.java @@ -79,8 +79,8 @@ public void testDefaultParams() null ); - Assert.assertEquals(KllFloatsSketchAggregatorFactory.DEFAULT_K, factory.getK()); - Assert.assertEquals(KllFloatsSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); + Assert.assertEquals(KllSketchAggregatorFactory.DEFAULT_K, factory.getK()); + Assert.assertEquals(KllSketchAggregatorFactory.DEFAULT_MAX_STREAM_LENGTH, factory.getMaxStreamLength()); } @Test From f81d11da130bb6185a88eece218cdd3be68807f9 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Mon, 9 May 2022 12:26:36 -0700 Subject: [PATCH 05/10] fixed test --- .../datasketches/kll/KllDoublesSketchAggregatorTest.java | 2 +- .../datasketches/kll/KllFloatsSketchAggregatorTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java index cd85d287a160..0aec67c2efe5 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java @@ -364,7 +364,7 @@ public void buildingSketchesAtQueryTime() throws Exception Object sketchObjectWithNulls = row.get(1); Assert.assertTrue(sketchObjectWithNulls instanceof Long); long sketchValueWithNulls = (long) sketchObjectWithNulls; - Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls); + Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 355, sketchValueWithNulls); // post agg Object quantileObject = row.get(2); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java index 1e71d4edfa0d..11e9aa9615d7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java @@ -364,7 +364,7 @@ public void buildingSketchesAtQueryTime() throws Exception Object sketchObjectWithNulls = row.get(1); Assert.assertTrue(sketchObjectWithNulls instanceof Long); long sketchValueWithNulls = (long) sketchObjectWithNulls; - Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls); + Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 355, sketchValueWithNulls); // post agg Object quantileObject = row.get(2); From f5743531225dd50e963eeeda374c5eeb3761125c Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Thu, 18 Aug 2022 17:15:44 -0700 Subject: [PATCH 06/10] addressed review points --- docs/development/extensions-core/datasketches-kll.md | 4 ++-- docs/development/extensions-core/datasketches-quantiles.md | 2 +- .../datasketches/kll/KllDoublesSketchAggregatorFactory.java | 2 +- .../datasketches/kll/KllFloatsSketchAggregatorFactory.java | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/development/extensions-core/datasketches-kll.md b/docs/development/extensions-core/datasketches-kll.md index ebc29e42c932..a1512e0bd70d 100644 --- a/docs/development/extensions-core/datasketches-kll.md +++ b/docs/development/extensions-core/datasketches-kll.md @@ -55,8 +55,8 @@ The result of the aggregation is a KllFloatsSketch or KllDoublesSketch that is t |type|This String should be "KllFloatsSketch" or "KllDoublesSketch"|yes| |name|A String for the output (result) name of the calculation.|yes| |fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| -|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be from 8 to 65535.|no, defaults to 200| -|maxStreamLength|This parameter defines the number of items presented to each sketch before it might, in the context of a BufferAggregator, grow larger than a preallocated memory region and need to move on heap. Ideally just a few sketches should grow that large.|no, defaults to 1000000000| +|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be from 8 to 65535. See [KLL Sketch Accuracy and Size](https://datasketches.apache.org/docs/KLL/KLLAccuracyAndSize.html).|no, defaults to 200| +|maxStreamLength|This parameter defines the number of items that can be presented to each sketch before it may need to move from off-heap to on-heap memory. This is relevant to query types that use off-heap memory, including [TopN](../../querying/topnquery.md) and [GroupBy](../../querying/groupbyquery.md). Ideally, should be set high enough such that most sketches can stay off-heap.|no, defaults to 1000000000| ### Post Aggregators diff --git a/docs/development/extensions-core/datasketches-quantiles.md b/docs/development/extensions-core/datasketches-quantiles.md index a4428a618fcc..0470852a4d28 100644 --- a/docs/development/extensions-core/datasketches-quantiles.md +++ b/docs/development/extensions-core/datasketches-quantiles.md @@ -56,7 +56,7 @@ The result of the aggregation is a DoublesSketch that is the union of all sketch |name|A String for the output (result) name of the calculation.|yes| |fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| |k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2 from 2 to 32768. See [accuracy information](https://datasketches.apache.org/docs/Quantiles/OrigQuantilesSketch) in the DataSketches documentation for details.|no, defaults to 128| -|maxStreamLength|This parameter defines the number of items presented to each sketch before it might, in the context of a BufferAggregator, grow larger than a preallocated memory region and need to move on heap. Ideally just a few sketches should grow that large.|no, defaults to 1000000000| +|maxStreamLength|This parameter defines the number of items that can be presented to each sketch before it may need to move from off-heap to on-heap memory. This is relevant to query types that use off-heap memory, including [TopN](../../querying/topnquery.md) and [GroupBy](../../querying/groupbyquery.md). Ideally, should be set high enough such that most sketches can stay off-heap.|no, defaults to 1000000000| ### Post Aggregators diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java index 6bafec93910c..23207a596779 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorFactory.java @@ -105,7 +105,7 @@ public AggregatorFactory getMergingFactory(final AggregatorFactory other) return new KllDoublesSketchMergeAggregatorFactory( getName(), Math.max(getK(), ((KllDoublesSketchAggregatorFactory) other).getK()), - getMaxStreamLength() + Math.max(getMaxStreamLength(), ((KllDoublesSketchAggregatorFactory) other).getMaxStreamLength()) ); } else { throw new AggregatorFactoryNotMergeableException(this, other); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java index 72ae74ea83d6..6b7f563c6750 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorFactory.java @@ -105,7 +105,7 @@ public AggregatorFactory getMergingFactory(final AggregatorFactory other) return new KllFloatsSketchMergeAggregatorFactory( getName(), Math.max(getK(), ((KllFloatsSketchAggregatorFactory) other).getK()), - getMaxStreamLength() + Math.max(getMaxStreamLength(), ((KllFloatsSketchAggregatorFactory) other).getMaxStreamLength()) ); } else { throw new AggregatorFactoryNotMergeableException(this, other); From bb2ad0010e8f6d016c9fd48162d5f4d15c714338 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Tue, 23 Aug 2022 09:44:55 -0700 Subject: [PATCH 07/10] added KLL sketch related terms --- website/.spelling | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/website/.spelling b/website/.spelling index 89ad85dd3264..8652e8d070d5 100644 --- a/website/.spelling +++ b/website/.spelling @@ -2006,3 +2006,10 @@ protobuf Golang multiValueHandling _n_ +KLL +KllFloatsSketch +KllDoublesSketch +PMF +CDF +maxStreamLength +toString From a26783239ebf52616c5174a56c603cb7db12dd14 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Wed, 24 Aug 2022 17:01:51 -0700 Subject: [PATCH 08/10] return a copy from get --- .../kll/KllDoublesSketchBuildBufferAggregatorHelper.java | 9 +++++++++ .../kll/KllFloatsSketchBuildBufferAggregatorHelper.java | 9 +++++++++ .../kll/KllSketchBuildBufferAggregatorHelper.java | 7 ++----- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java index 9d977a6b6f0e..ebe461b70fc7 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchBuildBufferAggregatorHelper.java @@ -20,9 +20,12 @@ package org.apache.druid.query.aggregation.datasketches.kll; import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import java.nio.ByteBuffer; + public class KllDoublesSketchBuildBufferAggregatorHelper extends KllSketchBuildBufferAggregatorHelper { @@ -43,4 +46,10 @@ KllDoublesSketch writableWrap(final WritableMemory mem, final MemoryRequestServe return KllDoublesSketch.writableWrap(mem, reqServer); } + @Override + public KllDoublesSketch get(final ByteBuffer buffer, final int position) + { + return KllDoublesSketch.wrap(Memory.wrap(getSketchAtPosition(buffer, position).toByteArray())); + } + } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java index 360d864f962d..4fb427e2b172 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchBuildBufferAggregatorHelper.java @@ -20,9 +20,12 @@ package org.apache.druid.query.aggregation.datasketches.kll; import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import java.nio.ByteBuffer; + public class KllFloatsSketchBuildBufferAggregatorHelper extends KllSketchBuildBufferAggregatorHelper { @@ -43,4 +46,10 @@ KllFloatsSketch writableWrap(final WritableMemory mem, final MemoryRequestServer return KllFloatsSketch.writableWrap(mem, reqServer); } + @Override + public KllFloatsSketch get(final ByteBuffer buffer, final int position) + { + return KllFloatsSketch.wrap(Memory.wrap(getSketchAtPosition(buffer, position).toByteArray())); + } + } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java index f0d0ea889b83..10a4165f583a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java @@ -52,11 +52,6 @@ public void init(final ByteBuffer buffer, final int position) putSketch(buffer, position, sketch); } - public SketchType get(final ByteBuffer buffer, final int position) - { - return sketches.get(buffer).get(position); - } - // A small number of sketches may run out of the given memory, request more memory on heap and move there. // In that case we need to reuse the object from the cache as opposed to wrapping the new buffer. public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) @@ -106,4 +101,6 @@ private void putSketch(final ByteBuffer buffer, final int position, final Sketch abstract SketchType newDirectInstance(int k, WritableMemory mem, MemoryRequestServer reqServer); abstract SketchType writableWrap(WritableMemory mem, MemoryRequestServer reqServer); + + abstract SketchType get(ByteBuffer buf, int position); } From 7e117c72c86373027b39b701dd965d164716af52 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 25 Aug 2022 16:25:06 -0700 Subject: [PATCH 09/10] Copy unions when returning them from "get". --- ...KllDoublesSketchMergeBufferAggregatorHelper.java | 9 +++++++++ .../KllFloatsSketchMergeBufferAggregatorHelper.java | 9 +++++++++ .../kll/KllSketchBuildBufferAggregatorHelper.java | 5 ++++- .../kll/KllSketchMergeBufferAggregator.java | 2 +- .../kll/KllSketchMergeBufferAggregatorHelper.java | 13 +++++++------ 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java index fbc64e2709e1..ff008ea1ef75 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchMergeBufferAggregatorHelper.java @@ -20,9 +20,12 @@ package org.apache.druid.query.aggregation.datasketches.kll; import org.apache.datasketches.kll.KllDoublesSketch; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import java.nio.ByteBuffer; + public class KllDoublesSketchMergeBufferAggregatorHelper extends KllSketchMergeBufferAggregatorHelper { public KllDoublesSketchMergeBufferAggregatorHelper( @@ -45,4 +48,10 @@ KllDoublesSketch writableWrap(WritableMemory mem, MemoryRequestServer reqServer) return KllDoublesSketch.writableWrap(mem, reqServer); } + @Override + KllDoublesSketch get(ByteBuffer buffer, int position) + { + final KllDoublesSketch union = getSketchAtPosition(buffer, position); + return KllDoublesSketch.wrap(Memory.wrap(union.toByteArray())); + } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java index e5ef68c6268d..82ef65d88694 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchMergeBufferAggregatorHelper.java @@ -20,9 +20,12 @@ package org.apache.druid.query.aggregation.datasketches.kll; import org.apache.datasketches.kll.KllFloatsSketch; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import java.nio.ByteBuffer; + public class KllFloatsSketchMergeBufferAggregatorHelper extends KllSketchMergeBufferAggregatorHelper { public KllFloatsSketchMergeBufferAggregatorHelper( @@ -45,4 +48,10 @@ KllFloatsSketch writableWrap(final WritableMemory mem, final MemoryRequestServer return KllFloatsSketch.writableWrap(mem, reqServer); } + @Override + KllFloatsSketch get(ByteBuffer buffer, int position) + { + final KllFloatsSketch union = getSketchAtPosition(buffer, position); + return KllFloatsSketch.wrap(Memory.wrap(union.toByteArray())); + } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java index 10a4165f583a..313a01dd7488 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchBuildBufferAggregatorHelper.java @@ -79,7 +79,7 @@ public void clear() } /** - * Retrieves the sketch at a particular position. + * Retrieves the sketch at a particular position. The returned sketch references the provided buffer. */ public SketchType getSketchAtPosition(final ByteBuffer buf, final int position) { @@ -102,5 +102,8 @@ private void putSketch(final ByteBuffer buffer, final int position, final Sketch abstract SketchType writableWrap(WritableMemory mem, MemoryRequestServer reqServer); + /** + * Returns a copy of the sketch at the provided buffer. The returned sketch does not reference the provided buffer. + */ abstract SketchType get(ByteBuffer buf, int position); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java index cc04b7fa10ca..ec20000ce81c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregator.java @@ -49,7 +49,7 @@ public void init(final ByteBuffer buffer, final int position) @Override public Object get(final ByteBuffer buffer, final int position) { - return helper.getSketchAtPosition(buffer, position); + return helper.get(buffer, position); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java index fd54caad48d9..3f4b4bdf0ec5 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java @@ -55,11 +55,6 @@ public void init(final ByteBuffer buffer, final int position) putUnion(buffer, position, union); } - public Object get(final ByteBuffer buffer, final int position) - { - return unions.get(buffer).get(position); - } - public void clear() { unions.clear(); @@ -87,7 +82,7 @@ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, Byt } /** - * Retrieves the sketch at a particular position. + * Retrieves the sketch at a particular position. The returned sketch references the provided buffer. */ public SketchType getSketchAtPosition(final ByteBuffer buf, final int position) { @@ -109,4 +104,10 @@ private void putUnion(final ByteBuffer buffer, final int position, final SketchT abstract SketchType newDirectInstance(int k, WritableMemory mem, MemoryRequestServer reqServer); abstract SketchType writableWrap(WritableMemory mem, MemoryRequestServer reqServer); + + /** + * Returns a copy of the union at the provided buffer position. The returned union does not reference the + * provided buffer. + */ + abstract SketchType get(final ByteBuffer buffer, final int position); } From cfbbad5f6ec466f495275748d25e20ac3e24108c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 26 Aug 2022 08:43:33 -0700 Subject: [PATCH 10/10] Remove redundant "final". --- .../datasketches/kll/KllSketchMergeBufferAggregatorHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java index 3f4b4bdf0ec5..369286cf1ab2 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/kll/KllSketchMergeBufferAggregatorHelper.java @@ -109,5 +109,5 @@ private void putUnion(final ByteBuffer buffer, final int position, final SketchT * Returns a copy of the union at the provided buffer position. The returned union does not reference the * provided buffer. */ - abstract SketchType get(final ByteBuffer buffer, final int position); + abstract SketchType get(ByteBuffer buffer, int position); }