From 66664217eb300323aa40f85c9771565d508ced0b Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 4 May 2021 18:53:21 -0700 Subject: [PATCH 01/13] Add "stringEncoding" parameter to DataSketches HLL. Builds on the concept from #11172 and adds a way to feed HLL sketches with UTF-8 bytes. This must be an option rather than always-on, because prior to this patch, HLL sketches used UTF-16LE encoding when hashing strings. To remain compatible with sketch images created prior to this patch -- which matters during rolling updates and when reading sketches that have been written to segments -- we must keep UTF-16LE as the default. Not currently documented, because I'm not yet sure how best to expose this functionality to users. I think the first place would be in the SQL layer: we could have it automatically select UTF-8 or UTF-16LE when building sketches at query time. We need to be careful about this, though, because UTF-8 isn't always faster. Sometimes, like for the results of expressions, UTF-16LE is faster. I expect we will sort this out in future patches. --- codestyle/spotbugs-exclude.xml | 1 + .../java/util/common/StringEncoding.java | 45 ++++ ...codingDefaultUTF16LEJsonIncludeFilter.java | 20 ++ .../java/util/common/StringEncodingTest.java | 57 +++++ .../hll/HllSketchAggregatorFactory.java | 76 +++--- .../hll/HllSketchBuildAggregator.java | 54 +---- .../hll/HllSketchBuildAggregatorFactory.java | 34 ++- .../hll/HllSketchBuildBufferAggregator.java | 24 +- .../HllSketchBuildColumnProcessorFactory.java | 110 +++++++++ .../datasketches/hll/HllSketchBuildUtil.java | 112 +++++++++ .../hll/HllSketchBuildVectorAggregator.java | 68 +++--- .../hll/HllSketchMergeAggregatorFactory.java | 15 +- .../hll/HllSketchMergeVectorAggregator.java | 7 +- .../hll/sql/HllSketchBaseSqlAggregator.java | 2 + .../DoubleHllSketchBuildVectorProcessor.java | 74 ++++++ .../vector/HllSketchBuildVectorProcessor.java | 35 +++ .../HllSketchBuildVectorProcessorFactory.java | 94 ++++++++ .../LongHllSketchBuildVectorProcessor.java | 74 ++++++ ...ueStringHllSketchBuildVectorProcessor.java | 92 ++++++++ .../ObjectHllSketchBuildVectorProcessor.java | 86 +++++++ ...ueStringHllSketchBuildVectorProcessor.java | 81 +++++++ .../hll/HllSketchAggregatorFactoryTest.java | 28 ++- .../hll/HllSketchAggregatorTest.java | 222 ++++++++++++++++-- .../HllSketchBuildAggregatorFactoryTest.java | 102 ++++++++ .../HllSketchMergeAggregatorFactoryTest.java | 99 +++++++- ...HllSketchToEstimatePostAggregatorTest.java | 1 + .../hll/sql/HllSketchSqlAggregatorTest.java | 16 +- 27 files changed, 1470 insertions(+), 159 deletions(-) create mode 100644 core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java create mode 100644 core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java create mode 100644 core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/DoubleHllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/LongHllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java create mode 100644 extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java diff --git a/codestyle/spotbugs-exclude.xml b/codestyle/spotbugs-exclude.xml index cb6cb9c34fee..9dee2cfdb560 100644 --- a/codestyle/spotbugs-exclude.xml +++ b/codestyle/spotbugs-exclude.xml @@ -35,6 +35,7 @@ + diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java new file mode 100644 index 000000000000..3d1694d9c6cf --- /dev/null +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java @@ -0,0 +1,45 @@ +package org.apache.druid.java.util.common; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import com.google.common.primitives.SignedBytes; + +/** + * An enum that provides a way for users to specify what encoding should be used when hashing strings. + * + * The main reason for thsi setting's existence is getting the best performance possible. When operating on memory + * mapped segments -- which store strings as UTF-8 -- it is fastest to use "UTF8". When operating on the result of + * expressions, or on an in-heap IncrementalIndex -- which use Java strings -- it is fastest to use "UTF16LE". + * + * This decision cannot be made locally, because different encodings do not generate equivalent hashes, and therefore + * they are not mergeable. The decision must be made globally by the end user or by the SQL planner, and should be + * based on where most input strings are expected to come from. + * + * Currently, UTF8 and UTF16LE are the only two options, because there are no situations where other options would be + * higher-performing. + */ +public enum StringEncoding implements Cacheable +{ + // Do not change order; the ordinal is used by cache keys. Add new ones at the end. + UTF8, + UTF16LE /* Equivalent to treating the result of str.toCharArray() as a bag of bytes in little-endian order */; + + @JsonCreator + public static StringEncoding fromString(final String name) + { + return valueOf(StringUtils.toUpperCase(name)); + } + + @Override + public byte[] getCacheKey() + { + return new byte[]{SignedBytes.checkedCast(ordinal())}; + } + + @JsonValue + @Override + public String toString() + { + return StringUtils.toLowerCase(this.name()); + } +} diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java new file mode 100644 index 000000000000..48676c547530 --- /dev/null +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java @@ -0,0 +1,20 @@ +package org.apache.druid.java.util.common; + +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * {@link JsonInclude} filter for {@link StringEncoding} that ignores UTF16LE, which is the typical default + * for aggregators. + * + * This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs + * exclusions (see spotbugs-exclude.xml). + */ +@SuppressWarnings({"EqualsAndHashcode", "EqualsWhichDoesntCheckParameterClass"}) +public class StringEncodingDefaultUTF16LEJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode] +{ + @Override + public boolean equals(Object obj) + { + return obj == StringEncoding.UTF16LE; + } +} diff --git a/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java b/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java new file mode 100644 index 000000000000..040b4599f915 --- /dev/null +++ b/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.java.util.common; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; + +public class StringEncodingTest +{ + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new ObjectMapper(); + + Assert.assertEquals( + StringEncoding.UTF16LE, + mapper.readValue(mapper.writeValueAsString(StringEncoding.UTF16LE), StringEncoding.class) + ); + + Assert.assertEquals( + StringEncoding.UTF8, + mapper.readValue(mapper.writeValueAsString(StringEncoding.UTF8), StringEncoding.class) + ); + } + + @Test + public void testGetCacheKey() throws IOException + { + Assert.assertFalse( + Arrays.equals( + StringEncoding.UTF8.getCacheKey(), + StringEncoding.UTF16LE.getCacheKey() + ) + ); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index 18d63a90f133..62227db53a8c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -19,10 +19,13 @@ package org.apache.druid.query.aggregation.datasketches.hll; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; import org.apache.datasketches.hll.Union; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.StringEncodingDefaultUTF16LEJsonIncludeFilter; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.ObjectAggregateCombiner; @@ -44,6 +47,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory public static final boolean DEFAULT_ROUND = false; public static final int DEFAULT_LG_K = 12; public static final TgtHllType DEFAULT_TGT_HLL_TYPE = TgtHllType.HLL_4; + public static final StringEncoding DEFAULT_STRING_ENCODING = StringEncoding.UTF16LE; static final Comparator COMPARATOR = Comparator.nullsFirst(Comparator.comparingDouble(HllSketch::getEstimate)); @@ -52,6 +56,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory private final String fieldName; private final int lgK; private final TgtHllType tgtHllType; + private final StringEncoding stringEncoding; private final boolean round; HllSketchAggregatorFactory( @@ -59,6 +64,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory final String fieldName, @Nullable final Integer lgK, @Nullable final String tgtHllType, + @Nullable final StringEncoding stringEncoding, final boolean round ) { @@ -66,6 +72,7 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory this.fieldName = Objects.requireNonNull(fieldName); this.lgK = lgK == null ? DEFAULT_LG_K : lgK; this.tgtHllType = tgtHllType == null ? DEFAULT_TGT_HLL_TYPE : TgtHllType.valueOf(tgtHllType); + this.stringEncoding = stringEncoding == null ? DEFAULT_STRING_ENCODING : stringEncoding; this.round = round; } @@ -95,6 +102,14 @@ public String getTgtHllType() } @JsonProperty + @JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = StringEncodingDefaultUTF16LEJsonIncludeFilter.class) + public StringEncoding getStringEncoding() + { + return stringEncoding; + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) public boolean isRound() { return round; @@ -107,14 +122,15 @@ public List requiredFields() } /** - * This is a convoluted way to return a list of input field names this aggregator needs. - * Currently the returned factories are only used to obtain a field name by calling getName() method. + * Used by groupBy v1 to create a "transfer aggregator". + * + * {@inheritDoc} */ @Override public List getRequiredColumns() { return Collections.singletonList( - new HllSketchBuildAggregatorFactory(fieldName, fieldName, lgK, tgtHllType.toString(), round) + new HllSketchBuildAggregatorFactory(fieldName, fieldName, lgK, tgtHllType.toString(), stringEncoding, round) ); } @@ -210,62 +226,64 @@ public Comparator getComparator() @Override public AggregatorFactory getCombiningFactory() { - return new HllSketchMergeAggregatorFactory(getName(), getName(), getLgK(), getTgtHllType(), isRound()); + return new HllSketchMergeAggregatorFactory( + getName(), + getName(), + getLgK(), + getTgtHllType(), + getStringEncoding(), + isRound() + ); } @Override public byte[] getCacheKey() { - return new CacheKeyBuilder(getCacheTypeId()).appendString(name).appendString(fieldName) - .appendInt(lgK).appendInt(tgtHllType.ordinal()).build(); + return new CacheKeyBuilder(getCacheTypeId()) + .appendString(name) + .appendString(fieldName) + .appendInt(lgK) + .appendInt(tgtHllType.ordinal()) + .appendInt(stringEncoding.ordinal()) + .build(); } @Override - public boolean equals(final Object object) + public boolean equals(Object o) { - if (this == object) { + if (this == o) { return true; } - if (object == null || !getClass().equals(object.getClass())) { - return false; - } - final HllSketchAggregatorFactory that = (HllSketchAggregatorFactory) object; - if (!name.equals(that.getName())) { - return false; - } - if (!fieldName.equals(that.getFieldName())) { - return false; - } - if (lgK != that.getLgK()) { - return false; - } - if (!tgtHllType.equals(that.tgtHllType)) { - return false; - } - if (round != that.round) { + if (o == null || getClass() != o.getClass()) { return false; } - return true; + HllSketchAggregatorFactory that = (HllSketchAggregatorFactory) o; + return lgK == that.lgK + && round == that.round + && Objects.equals(name, that.name) + && Objects.equals(fieldName, that.fieldName) + && tgtHllType == that.tgtHllType + && stringEncoding == that.stringEncoding; } @Override public int hashCode() { - return Objects.hash(name, fieldName, lgK, tgtHllType); + return Objects.hash(name, fieldName, lgK, tgtHllType, stringEncoding, round); } @Override public String toString() { - return getClass().getSimpleName() + " {" + return getClass().getSimpleName() + "{" + " name=" + name + ", fieldName=" + fieldName + ", lgK=" + lgK + ", tgtHllType=" + tgtHllType + + ", stringEncoding=" + stringEncoding + ", round=" + round + " }"; } protected abstract byte getCacheTypeId(); - } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java index be60842a3095..6d5e4314c3ff 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java @@ -21,11 +21,11 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; -import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.query.aggregation.Aggregator; -import org.apache.druid.segment.ColumnValueSelector; -import java.util.List; +import java.util.function.Consumer; +import java.util.function.Supplier; /** * This aggregator builds sketches from raw data. @@ -33,17 +33,19 @@ */ public class HllSketchBuildAggregator implements Aggregator { - - private final ColumnValueSelector selector; + private final Consumer> processor; + private final StringEncoding stringEncoding; private HllSketch sketch; public HllSketchBuildAggregator( - final ColumnValueSelector selector, + final Consumer> processor, final int lgK, - final TgtHllType tgtHllType + final TgtHllType tgtHllType, + final StringEncoding stringEncoding ) { - this.selector = selector; + this.processor = processor; + this.stringEncoding = stringEncoding; this.sketch = new HllSketch(lgK, tgtHllType); } @@ -55,13 +57,7 @@ public HllSketchBuildAggregator( @Override public void aggregate() { - final Object value = selector.getObject(); - if (value == null) { - return; - } - synchronized (this) { - updateSketch(sketch, value); - } + processor.accept(() -> sketch); } /* @@ -92,32 +88,4 @@ public long getLong() { throw new UnsupportedOperationException("Not implemented"); } - - static void updateSketch(final HllSketch sketch, final Object value) - { - if (value instanceof Integer || value instanceof Long) { - sketch.update(((Number) value).longValue()); - } else if (value instanceof Float || value instanceof Double) { - sketch.update(((Number) value).doubleValue()); - } else if (value instanceof String) { - sketch.update(((String) value).toCharArray()); - } else if (value instanceof List) { - // noinspection unchecked - List list = (List) value; - for (String v : list) { - sketch.update(v.toCharArray()); - } - } else if (value instanceof char[]) { - sketch.update((char[]) value); - } else if (value instanceof byte[]) { - sketch.update((byte[]) value); - } else if (value instanceof int[]) { - sketch.update((int[]) value); - } else if (value instanceof long[]) { - sketch.update((long[]) value); - } else { - throw new IAE("Unsupported type " + value.getClass()); - } - } - } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index df68180b7bf1..0cad2406a7dc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -23,16 +23,20 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; +import java.util.function.Consumer; +import java.util.function.Supplier; /** * This aggregator factory is for building sketches from raw data. @@ -47,10 +51,11 @@ public HllSketchBuildAggregatorFactory( @JsonProperty("fieldName") final String fieldName, @JsonProperty("lgK") @Nullable final Integer lgK, @JsonProperty("tgtHllType") @Nullable final String tgtHllType, + @JsonProperty("stringEncoding") @Nullable final StringEncoding stringEncoding, @JsonProperty("round") final boolean round ) { - super(name, fieldName, lgK, tgtHllType, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, round); } @Override @@ -68,18 +73,34 @@ protected byte getCacheTypeId() @Override public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) { - final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); - return new HllSketchBuildAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType())); + final Consumer> processor = ColumnProcessors.makeProcessor( + getFieldName(), + new HllSketchBuildColumnProcessorFactory(getStringEncoding()), + columnSelectorFactory + ); + + return new HllSketchBuildAggregator( + processor, + getLgK(), + TgtHllType.valueOf(getTgtHllType()), + getStringEncoding() + ); } @Override public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory) { - final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); + final Consumer> processor = ColumnProcessors.makeProcessor( + getFieldName(), + new HllSketchBuildColumnProcessorFactory(getStringEncoding()), + columnSelectorFactory + ); + return new HllSketchBuildBufferAggregator( - selector, + processor, getLgK(), TgtHllType.valueOf(getTgtHllType()), + getStringEncoding(), getMaxIntermediateSize() ); } @@ -93,11 +114,12 @@ public boolean canVectorize(ColumnInspector columnInspector) @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { - return new HllSketchBuildVectorAggregator( + return HllSketchBuildVectorAggregator.create( selectorFactory, getFieldName(), getLgK(), TgtHllType.valueOf(getTgtHllType()), + getStringEncoding(), getMaxIntermediateSize() ); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildBufferAggregator.java index ab54215e52f2..d11393d822c0 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildBufferAggregator.java @@ -19,12 +19,15 @@ package org.apache.druid.query.aggregation.datasketches.hll; +import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.ColumnValueSelector; import java.nio.ByteBuffer; +import java.util.function.Consumer; +import java.util.function.Supplier; /** * This aggregator builds sketches from raw data. @@ -32,18 +35,21 @@ */ public class HllSketchBuildBufferAggregator implements BufferAggregator { - private final ColumnValueSelector selector; + private final Consumer> processor; private final HllSketchBuildBufferAggregatorHelper helper; + private final StringEncoding stringEncoding; public HllSketchBuildBufferAggregator( - final ColumnValueSelector selector, + final Consumer> processor, final int lgK, final TgtHllType tgtHllType, + final StringEncoding stringEncoding, final int size ) { - this.selector = selector; + this.processor = processor; this.helper = new HllSketchBuildBufferAggregatorHelper(lgK, tgtHllType, size); + this.stringEncoding = stringEncoding; } @Override @@ -55,12 +61,7 @@ public void init(final ByteBuffer buf, final int position) @Override public void aggregate(final ByteBuffer buf, final int position) { - final Object value = selector.getObject(); - if (value == null) { - return; - } - - HllSketchBuildAggregator.updateSketch(helper.getSketchAtPosition(buf, position), value); + processor.accept(() -> helper.getSketchAtPosition(buf, position)); } @Override @@ -100,10 +101,11 @@ public void relocate(final int oldPosition, final int newPosition, final ByteBuf @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("selector", selector); + inspector.visit("processor", processor); // lgK should be inspected because different execution paths exist in HllSketch.update() that is called from // @CalledFromHotLoop-annotated aggregate() depending on the lgK. // See https://github.com/apache/druid/pull/6893#discussion_r250726028 inspector.visit("lgK", helper.getLgK()); + inspector.visit("stringEncoding", stringEncoding); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java new file mode 100644 index 000000000000..9c2a466d1c61 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildColumnProcessorFactory.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.segment.BaseDoubleColumnValueSelector; +import org.apache.druid.segment.BaseFloatColumnValueSelector; +import org.apache.druid.segment.BaseLongColumnValueSelector; +import org.apache.druid.segment.BaseObjectColumnValueSelector; +import org.apache.druid.segment.ColumnProcessorFactory; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; + +import java.util.function.Consumer; +import java.util.function.Supplier; + +/** + * Scalar (non-vectorized) column processor factory. + */ +public class HllSketchBuildColumnProcessorFactory implements ColumnProcessorFactory>> +{ + private final StringEncoding stringEncoding; + + HllSketchBuildColumnProcessorFactory(StringEncoding stringEncoding) + { + this.stringEncoding = stringEncoding; + } + + @Override + public ValueType defaultType() + { + return ValueType.STRING; + } + + @Override + public Consumer> makeDimensionProcessor(DimensionSelector selector, boolean multiValue) + { + return sketch -> { + final IndexedInts row = selector.getRow(); + final int sz = row.size(); + + for (int i = 0; i < sz; i++) { + HllSketchBuildUtil.updateSketchWithDictionarySelector(sketch.get(), stringEncoding, selector, row.get(i)); + } + }; + } + + @Override + public Consumer> makeFloatProcessor(BaseFloatColumnValueSelector selector) + { + return sketch -> { + if (!selector.isNull()) { + // Important that this is *double* typed, since HllSketchBuildAggregator treats doubles and floats the same. + final double value = selector.getFloat(); + sketch.get().update(value); + } + }; + } + + @Override + public Consumer> makeDoubleProcessor(BaseDoubleColumnValueSelector selector) + { + return sketch -> { + if (!selector.isNull()) { + sketch.get().update(selector.getDouble()); + } + }; + } + + @Override + public Consumer> makeLongProcessor(BaseLongColumnValueSelector selector) + { + return sketch -> { + if (!selector.isNull()) { + sketch.get().update(selector.getLong()); + } + }; + } + + @Override + public Consumer> makeComplexProcessor(BaseObjectColumnValueSelector selector) + { + return sketch -> { + final Object o = selector.getObject(); + + if (o != null) { + HllSketchBuildUtil.updateSketch(sketch.get(), stringEncoding, o); + } + }; + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java new file mode 100644 index 000000000000..7237065a9711 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; +import org.apache.druid.segment.DimensionDictionarySelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; + +public class HllSketchBuildUtil +{ + public static void updateSketch(final HllSketch sketch, final StringEncoding stringEncoding, final Object value) + { + if (value instanceof Integer || value instanceof Long) { + sketch.update(((Number) value).longValue()); + } else if (value instanceof Float || value instanceof Double) { + sketch.update(((Number) value).doubleValue()); + } else if (value instanceof String) { + updateSketchWithString(sketch, stringEncoding, (String) value); + } else if (value instanceof List) { + // noinspection unchecked + List list = (List) value; + for (String v : list) { + updateSketchWithString(sketch, stringEncoding, v); + } + } else if (value instanceof char[]) { + sketch.update((char[]) value); + } else if (value instanceof byte[]) { + sketch.update((byte[]) value); + } else if (value instanceof int[]) { + sketch.update((int[]) value); + } else if (value instanceof long[]) { + sketch.update((long[]) value); + } else { + throw new IAE("Unsupported type " + value.getClass()); + } + } + + public static void updateSketchWithString( + final HllSketch sketch, + final StringEncoding stringEncoding, + @Nullable final String value + ) + { + if (value == null) { + return; + } + + switch (stringEncoding) { + case UTF8: + sketch.update(StringUtils.toUtf8(value)); + break; + case UTF16LE: + sketch.update(value.toCharArray()); + break; + default: + throw new UOE("Unsupported string encoding [%s]", stringEncoding); + } + } + + public static void updateSketchWithDictionarySelector( + final HllSketch sketch, + final StringEncoding stringEncoding, + final DimensionDictionarySelector selector, + final int id + ) + { + if (stringEncoding == StringEncoding.UTF8 && selector.supportsLookupNameUtf8()) { + final ByteBuffer buf = selector.lookupNameUtf8(id); + + if (buf != null) { + // We must copy the bytes here, because HllSketch doesn't have a method that accepts ByteBuffer. + // Should be possible to optimize this by adding such a method to DataSketches. + final byte[] bytes = new byte[buf.remaining()]; + buf.get(bytes); + sketch.update(bytes); + } else if (NullHandling.replaceWithDefault()) { + // Treat as empty string. + sketch.update(StringUtils.EMPTY_BYTES); + } else { + // Do nothing. + } + } else { + final String s = NullHandling.nullToEmptyIfNeeded(selector.lookupName(id)); + updateSketchWithString(sketch, stringEncoding, s); + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildVectorAggregator.java index 506c9c3a2a73..3786fe0dc787 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildVectorAggregator.java @@ -20,35 +20,52 @@ package org.apache.druid.query.aggregation.datasketches.hll; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.query.aggregation.VectorAggregator; -import org.apache.druid.query.aggregation.datasketches.util.ToObjectVectorColumnProcessorFactory; +import org.apache.druid.query.aggregation.datasketches.hll.vector.HllSketchBuildVectorProcessor; +import org.apache.druid.query.aggregation.datasketches.hll.vector.HllSketchBuildVectorProcessorFactory; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.ByteBuffer; -import java.util.function.Supplier; public class HllSketchBuildVectorAggregator implements VectorAggregator { + private final HllSketchBuildVectorProcessor processor; private final HllSketchBuildBufferAggregatorHelper helper; - private final Supplier objectSupplier; - HllSketchBuildVectorAggregator( + private HllSketchBuildVectorAggregator( + final HllSketchBuildVectorProcessor processor, + final HllSketchBuildBufferAggregatorHelper helper + ) + { + this.processor = processor; + this.helper = helper; + } + + public static HllSketchBuildVectorAggregator create( final VectorColumnSelectorFactory columnSelectorFactory, final String column, final int lgK, final TgtHllType tgtHllType, + final StringEncoding stringEncoding, final int size ) { - this.helper = new HllSketchBuildBufferAggregatorHelper(lgK, tgtHllType, size); - this.objectSupplier = - ColumnProcessors.makeVectorProcessor( - column, - ToObjectVectorColumnProcessorFactory.INSTANCE, - columnSelectorFactory - ); + final HllSketchBuildBufferAggregatorHelper helper = new HllSketchBuildBufferAggregatorHelper( + lgK, + tgtHllType, + size + ); + + final HllSketchBuildVectorProcessor processor = ColumnProcessors.makeVectorProcessor( + column, + new HllSketchBuildVectorProcessorFactory(helper, stringEncoding), + columnSelectorFactory + ); + + return new HllSketchBuildVectorAggregator(processor, helper); } @Override @@ -58,36 +75,15 @@ public void init(final ByteBuffer buf, final int position) } @Override - public void aggregate(final ByteBuffer buf, final int position, final int startRow, final int endRow) + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) { - final Object[] vector = objectSupplier.get(); - for (int i = startRow; i < endRow; i++) { - final Object value = vector[i]; - if (value != null) { - HllSketchBuildAggregator.updateSketch(helper.getSketchAtPosition(buf, position), value); - } - } + processor.aggregate(buf, position, startRow, endRow); } @Override - public void aggregate( - final ByteBuffer buf, - final int numRows, - final int[] positions, - @Nullable final int[] rows, - final int positionOffset - ) + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) { - final Object[] vector = objectSupplier.get(); - - for (int i = 0; i < numRows; i++) { - final Object o = vector[rows != null ? rows[i] : i]; - - if (o != null) { - final int position = positions[i] + positionOffset; - HllSketchBuildAggregator.updateSketch(helper.getSketchAtPosition(buf, position), o); - } - } + processor.aggregate(buf, numRows, positions, rows, positionOffset); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 050cf59e1fcf..a643f5f35bab 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -24,6 +24,7 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; import org.apache.datasketches.hll.Union; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; @@ -40,6 +41,12 @@ /** * This aggregator factory is for merging existing sketches. * The input column must contain {@link HllSketch} + * + * Note: aggregators generated by this class do not directly use "stringEncoding", but it is part of this class + * anyway so we can preserve enough information to ensure that we are merging sketches in a valid way. (Sketches with + * incompatible string encodings cannot be merged meaningfully.) Currently, the only way this is exposed is through + * {@link #getMergingFactory}, which will throw {@link AggregatorFactoryNotMergeableException} if presented with + * two aggregators with two different encodings. */ public class HllSketchMergeAggregatorFactory extends HllSketchAggregatorFactory { @@ -50,16 +57,19 @@ public HllSketchMergeAggregatorFactory( @JsonProperty("fieldName") final String fieldName, @JsonProperty("lgK") @Nullable final Integer lgK, @JsonProperty("tgtHllType") @Nullable final String tgtHllType, + @JsonProperty("stringEncoding") @Nullable final StringEncoding stringEncoding, @JsonProperty("round") final boolean round ) { - super(name, fieldName, lgK, tgtHllType, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, round); } @Override public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException { - if (other.getName().equals(this.getName()) && other instanceof HllSketchMergeAggregatorFactory) { + if (other.getName().equals(this.getName()) + && other instanceof HllSketchMergeAggregatorFactory + && getStringEncoding() == ((HllSketchMergeAggregatorFactory) other).getStringEncoding()) { HllSketchMergeAggregatorFactory castedOther = (HllSketchMergeAggregatorFactory) other; return new HllSketchMergeAggregatorFactory( @@ -67,6 +77,7 @@ public AggregatorFactory getMergingFactory(AggregatorFactory other) throws Aggre getName(), Math.max(getLgK(), castedOther.getLgK()), getTgtHllType().compareTo(castedOther.getTgtHllType()) < 0 ? castedOther.getTgtHllType() : getTgtHllType(), + getStringEncoding(), isRound() || castedOther.isRound() ); } else { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java index d97c0b5ce673..ef608dc26297 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java @@ -84,6 +84,7 @@ public void aggregate( final int positionOffset ) { + final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN); final Object[] vector = objectSupplier.get(); for (int i = 0; i < numRows; i++) { @@ -91,11 +92,7 @@ public void aggregate( if (o != null) { final int position = positions[i] + positionOffset; - - final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN) - .writableRegion(position, helper.getSize()); - - final Union union = Union.writableWrap(mem); + final Union union = Union.writableWrap(mem.writableRegion(position, helper.getSize())); union.update(o); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index 2f08cf0cdecc..efc66a545d29 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -126,6 +126,7 @@ public Aggregation toDruidAggregation( columnArg.getDirectColumn(), logK, tgtHllType, + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING /* Not used when merging; doesn't matter what we set */, ROUND ); } else { @@ -158,6 +159,7 @@ public Aggregation toDruidAggregation( dimensionSpec.getDimension(), logK, tgtHllType, + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, ROUND ); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/DoubleHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/DoubleHllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..7655a19992c0 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/DoubleHllSketchBuildVectorProcessor.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class DoubleHllSketchBuildVectorProcessor implements HllSketchBuildVectorProcessor +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final VectorValueSelector selector; + + public DoubleHllSketchBuildVectorProcessor( + final HllSketchBuildBufferAggregatorHelper helper, + final VectorValueSelector selector + ) + { + this.helper = helper; + this.selector = selector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final double[] vector = selector.getDoubleVector(); + final boolean[] nullVector = selector.getNullVector(); + + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[i]) { + sketch.update(vector[i]); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final double[] vector = selector.getDoubleVector(); + final boolean[] nullVector = selector.getNullVector(); + + for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; + if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[idx]) { + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + sketch.update(vector[idx]); + } + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..d5127b0dea5d --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessor.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Processor for {@link org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildVectorAggregator}. + * + * @see HllSketchBuildVectorProcessorFactory + */ +public interface HllSketchBuildVectorProcessor +{ + void aggregate(ByteBuffer buf, int position, int startRow, int endRow); + + void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset); +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java new file mode 100644 index 000000000000..aac55a2e0b72 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/HllSketchBuildVectorProcessorFactory.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.segment.VectorColumnProcessorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +public class HllSketchBuildVectorProcessorFactory implements VectorColumnProcessorFactory +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final StringEncoding stringEncoding; + + public HllSketchBuildVectorProcessorFactory( + final HllSketchBuildBufferAggregatorHelper helper, + final StringEncoding stringEncoding + ) + { + this.helper = helper; + this.stringEncoding = stringEncoding; + } + + @Override + public HllSketchBuildVectorProcessor makeSingleValueDimensionProcessor( + ColumnCapabilities capabilities, + SingleValueDimensionVectorSelector selector + ) + { + return new SingleValueStringHllSketchBuildVectorProcessor(helper, stringEncoding, selector); + } + + @Override + public HllSketchBuildVectorProcessor makeMultiValueDimensionProcessor( + ColumnCapabilities capabilities, + MultiValueDimensionVectorSelector selector + ) + { + return new MultiValueStringHllSketchBuildVectorProcessor(helper, stringEncoding, selector); + } + + @Override + public HllSketchBuildVectorProcessor makeFloatProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + // No specialized "float" version, for consistency with HllSketchBuildAggregator#updateSketch (it treats floats + // and doubles identically). + return new DoubleHllSketchBuildVectorProcessor(helper, selector); + } + + @Override + public HllSketchBuildVectorProcessor makeDoubleProcessor( + ColumnCapabilities capabilities, + VectorValueSelector selector + ) + { + return new DoubleHllSketchBuildVectorProcessor(helper, selector); + } + + @Override + public HllSketchBuildVectorProcessor makeLongProcessor(ColumnCapabilities capabilities, VectorValueSelector selector) + { + return new LongHllSketchBuildVectorProcessor(helper, selector); + } + + @Override + public HllSketchBuildVectorProcessor makeObjectProcessor( + ColumnCapabilities capabilities, + VectorObjectSelector selector + ) + { + return new ObjectHllSketchBuildVectorProcessor(helper, stringEncoding, selector); + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/LongHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/LongHllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..398aef660b7c --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/LongHllSketchBuildVectorProcessor.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class LongHllSketchBuildVectorProcessor implements HllSketchBuildVectorProcessor +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final VectorValueSelector selector; + + public LongHllSketchBuildVectorProcessor( + final HllSketchBuildBufferAggregatorHelper helper, + final VectorValueSelector selector + ) + { + this.helper = helper; + this.selector = selector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final long[] vector = selector.getLongVector(); + final boolean[] nullVector = selector.getNullVector(); + + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[i]) { + sketch.update(vector[i]); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final long[] vector = selector.getLongVector(); + final boolean[] nullVector = selector.getNullVector(); + + for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; + if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[idx]) { + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + sketch.update(vector[idx]); + } + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..58991413b76b --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildUtil; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class MultiValueStringHllSketchBuildVectorProcessor implements HllSketchBuildVectorProcessor +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final StringEncoding stringEncoding; + private final MultiValueDimensionVectorSelector selector; + + public MultiValueStringHllSketchBuildVectorProcessor( + final HllSketchBuildBufferAggregatorHelper helper, + final StringEncoding stringEncoding, + final MultiValueDimensionVectorSelector selector + ) + { + this.helper = helper; + this.stringEncoding = stringEncoding; + this.selector = selector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final IndexedInts[] vector = selector.getRowVector(); + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + final IndexedInts ids = vector[i]; + final int sz = ids.size(); + + for (int j = 0; j < sz; j++) { + HllSketchBuildUtil.updateSketchWithDictionarySelector( + sketch, + stringEncoding, + selector, + ids.get(j) + ); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final IndexedInts[] vector = selector.getRowVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + final IndexedInts ids = vector[rows != null ? rows[i] : i]; + final int sz = ids.size(); + + for (int j = 0; j < sz; j++) { + HllSketchBuildUtil.updateSketchWithDictionarySelector( + sketch, + stringEncoding, + selector, + ids.get(j) + ); + } + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..99666a32306f --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildUtil; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + * Processor that handles cases where string columns are presented as object selectors instead of dimension selectors. + */ +public class ObjectHllSketchBuildVectorProcessor implements HllSketchBuildVectorProcessor +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final StringEncoding stringEncoding; + private final VectorObjectSelector selector; + + public ObjectHllSketchBuildVectorProcessor( + final HllSketchBuildBufferAggregatorHelper helper, + final StringEncoding stringEncoding, + final VectorObjectSelector selector + ) + { + this.helper = helper; + this.stringEncoding = stringEncoding; + this.selector = selector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final Object[] vector = selector.getObjectVector(); + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + if (vector[i] != null) { + HllSketchBuildUtil.updateSketch( + sketch, + stringEncoding, + vector[i] + ); + } + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final Object[] vector = selector.getObjectVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + if (vector[i] != null) { + HllSketchBuildUtil.updateSketch( + sketch, + stringEncoding, + vector[i] + ); + } + } + } +} diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java new file mode 100644 index 000000000000..b2b0070f8385 --- /dev/null +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll.vector; + +import org.apache.datasketches.hll.HllSketch; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildBufferAggregatorHelper; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildUtil; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class SingleValueStringHllSketchBuildVectorProcessor implements HllSketchBuildVectorProcessor +{ + private final HllSketchBuildBufferAggregatorHelper helper; + private final StringEncoding stringEncoding; + private final SingleValueDimensionVectorSelector selector; + + public SingleValueStringHllSketchBuildVectorProcessor( + final HllSketchBuildBufferAggregatorHelper helper, + final StringEncoding stringEncoding, + final SingleValueDimensionVectorSelector selector + ) + { + this.helper = helper; + this.stringEncoding = stringEncoding; + this.selector = selector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + final int[] vector = selector.getRowVector(); + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + for (int i = startRow; i < endRow; i++) { + HllSketchBuildUtil.updateSketchWithDictionarySelector( + sketch, + stringEncoding, + selector, + vector[i] + ); + } + } + + @Override + public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) + { + final int[] vector = selector.getRowVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + final HllSketch sketch = helper.getSketchAtPosition(buf, position); + + HllSketchBuildUtil.updateSketchWithDictionarySelector( + sketch, + stringEncoding, + selector, + vector[i] + ); + } + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java index 6af99b3298f3..db50cfaea676 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java @@ -19,8 +19,11 @@ package org.apache.druid.query.aggregation.datasketches.hll; +import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.Aggregator; @@ -32,6 +35,7 @@ import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.easymock.EasyMock; @@ -40,6 +44,7 @@ import org.junit.Test; import javax.annotation.Nullable; +import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.Arrays; @@ -52,6 +57,7 @@ public class HllSketchAggregatorFactoryTest private static final String FIELD_NAME = "fieldName"; private static final int LG_K = HllSketchAggregatorFactory.DEFAULT_LG_K; private static final String TGT_HLL_TYPE = TgtHllType.HLL_4.name(); + private static final StringEncoding STRING_ENCODING = StringEncoding.UTF16LE; private static final boolean ROUND = true; private static final double ESTIMATE = Math.PI; @@ -60,7 +66,7 @@ public class HllSketchAggregatorFactoryTest @Before public void setUp() { - target = new TestHllSketchAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, ROUND); + target = new TestHllSketchAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, STRING_ENCODING, ROUND); } @Test @@ -69,6 +75,12 @@ public void testIsRound() Assert.assertEquals(ROUND, target.isRound()); } + @Test + public void testStringEncoding() + { + Assert.assertEquals(STRING_ENCODING, target.getStringEncoding()); + } + @Test public void testGetRequiredColumns() { @@ -112,6 +124,7 @@ public void testFinalizeComputatioNoRound() FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, !ROUND ); Object actual = t.finalizeComputation(getMockSketch()); @@ -145,6 +158,7 @@ public void testEqualsOtherDiffName() FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); Assert.assertNotEquals(target, other); @@ -158,6 +172,7 @@ public void testEqualsOtherDiffFieldName() FIELD_NAME + "-diff", LG_K, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); Assert.assertNotEquals(target, other); @@ -171,6 +186,7 @@ public void testEqualsOtherDiffLgK() FIELD_NAME, LG_K + 1, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); Assert.assertNotEquals(target, other); @@ -184,6 +200,7 @@ public void testEqualsOtherDiffTgtHllType() FIELD_NAME, LG_K, TgtHllType.HLL_8.name(), + STRING_ENCODING, ROUND ); Assert.assertNotEquals(target, other); @@ -197,6 +214,7 @@ public void testEqualsOtherDiffRound() FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, !ROUND ); Assert.assertNotEquals(target, other); @@ -210,6 +228,7 @@ public void testEqualsOtherMatches() FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); Assert.assertEquals(target, other); @@ -244,6 +263,7 @@ public void testResultArraySignature() "col", null, null, + null, false ), new HllSketchBuildAggregatorFactory( @@ -251,6 +271,7 @@ public void testResultArraySignature() "col", null, null, + null, true ), new HllSketchMergeAggregatorFactory( @@ -258,6 +279,7 @@ public void testResultArraySignature() "col", null, null, + null, false ), new HllSketchMergeAggregatorFactory( @@ -265,6 +287,7 @@ public void testResultArraySignature() "col", null, null, + null, true ) ) @@ -326,10 +349,11 @@ private static class TestHllSketchAggregatorFactory extends HllSketchAggregatorF String fieldName, @Nullable Integer lgK, @Nullable String tgtHllType, + @Nullable StringEncoding stringEncoding, boolean round ) { - super(name, fieldName, lgK, tgtHllType, round); + super(name, fieldName, lgK, tgtHllType, stringEncoding, round); } @Override diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index afac5739db9e..36f16da9f641 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.QueryContexts; @@ -56,25 +57,29 @@ public class HllSketchAggregatorTest extends InitializedNullHandlingTest private final AggregationTestHelper helper; private final QueryContexts.Vectorize vectorize; + private final StringEncoding stringEncoding; @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); - public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize) + public HllSketchAggregatorTest(GroupByQueryConfig config, String vectorize, StringEncoding stringEncoding) { HllSketchModule.registerSerde(); helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( new HllSketchModule().getJacksonModules(), config, tempFolder); this.vectorize = QueryContexts.Vectorize.fromString(vectorize); + this.stringEncoding = stringEncoding; } - @Parameterized.Parameters(name = "config = {0}, vectorize = {1}") + @Parameterized.Parameters(name = "groupByConfig = {0}, vectorize = {1}, stringEncoding = {2}") public static Collection constructorFeeder() { final List constructors = new ArrayList<>(); for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { for (String vectorize : new String[]{"false", "true", "force"}) { - constructors.add(new Object[]{config, vectorize}); + for (StringEncoding stringEncoding : StringEncoding.values()) { + constructors.add(new Object[]{config, vectorize, stringEncoding}); + } } } return constructors; @@ -89,11 +94,11 @@ public void ingestSketches() throws Exception Arrays.asList("dim", "multiDim"), Arrays.asList("timestamp", "dim", "multiDim", "sketch") ), - buildAggregatorJson("HLLSketchMerge", "sketch", !ROUND), + buildAggregatorJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND) + buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -110,16 +115,51 @@ public void buildSketchesAtIngestionTime() throws Exception Collections.singletonList("dim"), Arrays.asList("timestamp", "dim", "multiDim", "id") ), - buildAggregatorJson("HLLSketchBuild", "id", !ROUND), + buildAggregatorJson("HLLSketchBuild", "id", !ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND) + buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200, (double) row.get(0), 0.1); + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketch; + + if (stringEncoding == StringEncoding.UTF16LE) { + expectedSketch = + "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAlswAULkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsWlpkHGfQIChp8/wQbxkIHHXaYC+0y0QeE" + + "PH8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEB76DTAVDtA0d6bbCB0U83gTMe7QKR4SSC0mAfQhKUuEFUHDLBd" + + "50mwRZqCoUW26JC/pCYgpeOPwFZaahBHHq/wZnfIkJaHyNB2uy0gpt6CgEbj5zBnG+3wd1WOMIeearB3v4dQh82FMGpq/NB39egwmAcM4I" + + "hMhvBIsKgAiNhmIGjmBkB6/iiw/pUEcGhBAcBDy1egZmZlQHofURBaAsYQ+hLNUPpeAsCapUaQSrDNYHzMXdCrSMMQq2LB0Ot1KMFbkKow" + + "e9ziAMvjrsB8HiSAcDfj0FllwcC8bmxwrJ7r4HyxBnB8zwDwfOkjIEywjwCIlJhwfVXj8G2JDFBt7ChgsivZUK5/BcB+lYCgftNIEGS0tP" + + "BviYvAv5SLUGtEUfBvtiMwX9viUE/h7SDgGNNQwCBQMFB6dcBPVhlwcJ77sLC+ebCw3dygoP3Z4FEMGyBxFLpAUf2bsHIu3JBSXnTwX1M0" + + "UHKt/+EitplQcuf+0EL3MCBTApNAky21sEND/uBDZJEwo4LfkMPG+1E0LRDxNEqUwLxgTJDEv9gQdN4W4ETsFoCU/zNwZTpzEPVE3EBlXv" + + "PARXHaIEW3+oE1yZzAZhI7EEY583EWTztAVlcwoGZi0iGiKPGAS/Z6MFezmHBIFVHReGx8UIibNbBYEZzAiQowUIFvZeCJrLFQecE80Goc" + + "XQBaTFkwylo48IprP+BQmeJQapt3sFqhHKHa2xnAuvBeYEsRPiC7RrOQW4Ba0GvLcOB74/BQ2/L2MEwLN6BgR8uwbFc6AKa7heB8o74gbM" + + "ZRoLz9cyBNEPnwfVg28G1jWXBRp4PwXJHJoL7dzxB93jghPp7r0M459kB74n6Abn56sOOGLTDOphoAf+2FUF8hm6BvSHpQr1TaEE4BX8BP" + + "kTeQX602UF/ZESEf/DRQs="; + } else { + expectedSketch = + "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDotkQRYLEOIHDjIfCBCYZRMbmgYHfkdRB7lpWgj+UpIML/iCBLnt1AYyAMoGNIzUBDWsAwaL" + + "VYkGN4iHBzusLAY/NKsGROa/Bkbi9gxHzIwHSXg2B6qmDwZRZDYEVZ63CVZobAtY2FMEWkBtBl6WJghogCcFadYrBnfeJAh4GB4RfVy/G8" + + "B/VQaAeFAKheCsGIciaw6JvKgGi1xBDNa36gSNYtIFjyzZDpAQ4AuSgPQKkxYkCJfExgaZaP8EnmRcBqMORAXjgo8NpnaCBKmqrAWq0tcP" + + "rtYqD6+8sAmybMoHufnhBbiMOQW53G4F2sZXBMJOCRXEMtkJx4SeFMsGuA7RJBEG1PaiC9kk7Q/a3MoE22AFCNsCKAXdRhwG4vLmBePAWB" + + "LmVCQMUitrCOmcCAbrGJEU7NaUBADunBW5PTEF+MKqGgD2/Qr8riYH/khMBP9OqAkA+5cHAR/DBgIB7hgIc04JDoWnEg/H5QoRlZkVEm2x" + + "BhS/lQYa/b8THXcwBh9TsgUikxwQJZ1FHoifNwony8ILflUbBSppxwsrHTsELX0JBi6PYgkxL10GMl8nDioLmgXskNwFOfWCCTzxqBk+8Y" + + "gKQQMdDE3pbA5OGScOeK9PCRCA8gRSSfUHVwVeBFutkQ5k468FZwecDGnP9gZrVRsGbbOpEXHRsB52LxEGdxPyDngVqw3+PBgEe02DBH3n" + + "EAR+vUoaf18LBoVpGwqG+S0IiPfqC4vzIBtRdDoFj++8BpGfJwWTDXkblLFVBpdT5gWYB1IRmiNTD5zLFRCeA2UP6bSaBTHddAbej8sEqz" + + "U0C6z5YA6vpRcKsve8Fzl5HAm1zV8Kt6tFCFJn6hO5t5kFvN2cC71rfga/QaAOwL3zCsM59wbFv4gEmViuBciRjgbJSXQMy9GbEOPSBQTN" + + "d2QEzzmRES+iLxLT8ZEF1K3oKDyZYg3YC/0JDpijBN1h7AneDRAe35diD+ENjwnio30H5KVqFOjHFgbppaIS6xdHBvHjhATzC3EFUke5Cv" + + "YXVwn3CZwI+jVbBv4n3xE="; + } + + Assert.assertEquals("\"" + expectedSketch + "\"", helper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -135,12 +175,48 @@ public void buildSketchesAtQueryTime() throws Exception 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "id", !ROUND) + buildGroupByQueryJson("HLLSketchBuild", "id", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200, (double) row.get(0), 0.1); + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketch; + + if (stringEncoding == StringEncoding.UTF16LE) { + expectedSketch = + "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAmeJQYLkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsW9l4IGfQIChp8/wQbxkIHHXaYC+0y0QeE" + + "PH8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEBwlswAVDtA0d6bbCB0U83gTMe7QKR4SSC0mAfQhKUuEFUHDLBd" + + "50mwRZqCoUW26JC144/AVlpqEEcer/Bmd8iQlofI0HtGs5BWu4Xgdt6CgEbj5zBnG+3wd1WOMIeearB3v4dQh82FMGpq/NB39egwmAcM4I" + + "voNMBYTIbwSLCoAIjYZiBo5gZAev4osP6VBHBoQQHAQ8tXoGZmZUB6H1EQWgLGEPoSzVD6XgLAmqVGkEqwzWB8zF3Qq0jDEKtiwdDrdSjB" + + "W5CqMHvc4gDL467AfB4kgHA349BZZcHAvG5scKye6+B8sQZwfM8A8HzpIyBMsI8AiJSYcH1V4/BtiQxQbewoYLIr2VChaWmQfn8FwH6VgK" + + "B+00gQZLS08G+Ji8C/lItQb6QmIK+2IzBf2+JQT+HtIOAY01DAIFAwUHp1wE9WGXBwnvuwsL55sLDd3KCg/dngUQwbIHEUukBR/Zuwci7c" + + "kFJedPBSrf/hIraZUHLn/tBC9zAgUwKTQJMttbBDQ/7gQ2SRMKOC35DDxvtRNC0Q8T9U2hBESpTAvGBMkMS/2BB03hbgROwWgJT/M3BlOn" + + "MQ9UTcQGVe88BFcdogRbf6gTXJnMBmEjsQRjnzcRZPO0BWVzCgZmLSIaIo8YBL9nowV7OYcEgVUdF4bHxQiJs1sFgRnMCJCjBQiayxUHnB" + + "PNBqHF0AWkxZMMpaOPCKaz/gWpt3sFqhHKHa2xnAuvBeYEsRPiC7RFHwa4Ba0GvLcOB74/BQ2/L2MEwLN6BgR8uwZrstIKxXOgCso74gbM" + + "ZRoLz9cyBNEPnwfVg28G1jWXBRp4PwXJHJoL7dzxB93jghPp7r0M459kB74n6Abn56sOOGLTDOphoAf+2FUF8hm6BvSHpQr1M0UH4BX8BP" + + "kTeQX602UF/ZESEf/DRQs="; + } else { + // UTF-8 + expectedSketch = + "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDotkQRYLEOIHDjIfCBCYZRMbmgYHfkdRB7lpWgj+UpIML/iCBLnt1AYyAMoGNIzUBDWsAwaL" + + "VYkGN4iHBzusLAY/NKsGROa/Bkbi9gxHzIwHSXg2B6qmDwZRdDoFVZ63CVZobAtY2FMEWkBtBl6WJghogCcFadYrBnfeJAh4GB4RfVy/G4" + + "B4UAqcyxUQheCsGIciaw6JvKgGi1xBDI1i0gWPLNkOkBDgC5KA9AqTFiQIl8TGBplo/wSeZFwGow5EBeOCjw2mdoIEqaqsBarS1w+u1ioP" + + "r7ywCbJsyge5+eEFuIw5BbncbgXaxlcEwk4JFcQy2QnHhJ4Uywa4DtEkEQbU9qIL2STtD9rcygTbYAUI2wIoBd1GHAbi8uYF48BYEuZUJA" + + "xSK2sI6bSaBesYkRTs1pQEAO6cFbk9MQX4wqoaAPb9CvyuJgf+SEwE/06oCQD7lwcBH8MGAgHuGAhzTgkOhacSD8flChGVmRUSbbEGFL+V" + + "Bhr9vxMddzAGH1OyBSKTHBAlnUUeiJ83CifLwgt+VRsFKmnHCysdOwQtfQkGLo9iCTEvXQYyXycOKguaBeyQ3AXpnAgGOfWCCTyZYg0+8Y" + + "gKQQMdDE3pbA5OGScOeK9PCRCA8gRSSfUHVwVeBFutkQ5k468FZwecDGnP9gZrVRsGbbOpEXHRsB52LxEGdxPyDngVqw3+PBgEe02DBH3n" + + "EAR+vUoaf18LBlFkNgSFaRsKhvktCIj36guL8yAbj++8BpGfJwWTDXkblLFVBpdT5gWYB1IRmiNTD7XNXwqeA2UPwL3zCjHddAbej8sEqz" + + "U0C6z5YA6vpRcKsve8Fzl5HAk88agZt6tFCFJn6hO5t5kFvN2cC71rfga/QaAOwH9VBsM59wbFv4gEmViuBciRjgbJSXQMy9GbEOPSBQTN" + + "d2QEzzmRES+iLxLT8ZEF1K3oKNa36gTYC/0JDpijBN1h7AneDRAe35diD+ENjwnio30H5KVqFOjHFgbppaIS6xdHBvHjhATzC3EFUke5Cv" + + "YXVwn3CZwI+jVbBv4n3xE="; + } + + Assert.assertEquals("\"" + expectedSketch + "\"", helper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -156,12 +232,25 @@ public void buildSketchesAtQueryTimeMultiValue() throws Exception 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "multiDim", !ROUND) + buildGroupByQueryJson("HLLSketchBuild", "multiDim", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(14, (double) row.get(0), 0.1); + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketch; + + if (stringEncoding == StringEncoding.UTF16LE) { + expectedSketch = "AwEHDAUIAAEOAAAAhDx/BKWjjwiJs1sFRTzeBMnuvgfYkMUGyRyaC39egwmJSYcHOGLTDDkQqgg6gNUEGfQICj9ChAc="; + } else { + expectedSketch = "AwEHDAUIAAEOAAAAwH9VBslJdAyqpg8GrtYqD48s2Q6y97wX0/GRBR13MAbYC/0JOfWCCbzdnAtSSfUH/lKSDB9TsgU="; + } + + Assert.assertEquals("\"" + expectedSketch + "\"", helper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -177,12 +266,47 @@ public void roundBuildSketch() throws Exception 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "id", ROUND) + buildGroupByQueryJson("HLLSketchBuild", "id", ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200L, (long) row.get(0)); + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketch; + + if (stringEncoding == StringEncoding.UTF16LE) { + expectedSketch = + "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAmeJQYLkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsW9l4IGfQIChp8/wQbxkIHHXaYC+0y0QeEP" + + "H8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEBwlswAVDtA0d6bbCB0U83gTMe7QKR4SSC0mAfQhKUuEFUHDLBd50" + + "mwRZqCoUW26JC144/AVlpqEEcer/Bmd8iQlofI0HtGs5BWu4Xgdt6CgEbj5zBnG+3wd1WOMIeearB3v4dQh82FMGpq/NB39egwmAcM4IvoN" + + "MBYTIbwSLCoAIjYZiBo5gZAev4osP6VBHBoQQHAQ8tXoGZmZUB6H1EQWgLGEPoSzVD6XgLAmqVGkEqwzWB8zF3Qq0jDEKtiwdDrdSjBW5Cq" + + "MHvc4gDL467AfB4kgHA349BZZcHAvG5scKye6+B8sQZwfM8A8HzpIyBMsI8AiJSYcH1V4/BtiQxQbewoYLIr2VChaWmQfn8FwH6VgKB+00g" + + "QZLS08G+Ji8C/lItQb6QmIK+2IzBf2+JQT+HtIOAY01DAIFAwUHp1wE9WGXBwnvuwsL55sLDd3KCg/dngUQwbIHEUukBR/Zuwci7ckFJedP" + + "BSrf/hIraZUHLn/tBC9zAgUwKTQJMttbBDQ/7gQ2SRMKOC35DDxvtRNC0Q8T9U2hBESpTAvGBMkMS/2BB03hbgROwWgJT/M3BlOnMQ9UTcQ" + + "GVe88BFcdogRbf6gTXJnMBmEjsQRjnzcRZPO0BWVzCgZmLSIaIo8YBL9nowV7OYcEgVUdF4bHxQiJs1sFgRnMCJCjBQiayxUHnBPNBqHF0A" + + "WkxZMMpaOPCKaz/gWpt3sFqhHKHa2xnAuvBeYEsRPiC7RFHwa4Ba0GvLcOB74/BQ2/L2MEwLN6BgR8uwZrstIKxXOgCso74gbMZRoLz9cyB" + + "NEPnwfVg28G1jWXBRp4PwXJHJoL7dzxB93jghPp7r0M459kB74n6Abn56sOOGLTDOphoAf+2FUF8hm6BvSHpQr1M0UH4BX8BPkTeQX602UF" + + "/ZESEf/DRQs="; + } else { + expectedSketch = + "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDotkQRYLEOIHDjIfCBCYZRMbmgYHfkdRB7lpWgj+UpIML/iCBLnt1AYyAMoGNIzUBDWsAwaL" + + "VYkGN4iHBzusLAY/NKsGROa/Bkbi9gxHzIwHSXg2B6qmDwZRdDoFVZ63CVZobAtY2FMEWkBtBl6WJghogCcFadYrBnfeJAh4GB4RfVy/G4" + + "B4UAqcyxUQheCsGIciaw6JvKgGi1xBDI1i0gWPLNkOkBDgC5KA9AqTFiQIl8TGBplo/wSeZFwGow5EBeOCjw2mdoIEqaqsBarS1w+u1ioP" + + "r7ywCbJsyge5+eEFuIw5BbncbgXaxlcEwk4JFcQy2QnHhJ4Uywa4DtEkEQbU9qIL2STtD9rcygTbYAUI2wIoBd1GHAbi8uYF48BYEuZUJA" + + "xSK2sI6bSaBesYkRTs1pQEAO6cFbk9MQX4wqoaAPb9CvyuJgf+SEwE/06oCQD7lwcBH8MGAgHuGAhzTgkOhacSD8flChGVmRUSbbEGFL+V" + + "Bhr9vxMddzAGH1OyBSKTHBAlnUUeiJ83CifLwgt+VRsFKmnHCysdOwQtfQkGLo9iCTEvXQYyXycOKguaBeyQ3AXpnAgGOfWCCTyZYg0+8Y" + + "gKQQMdDE3pbA5OGScOeK9PCRCA8gRSSfUHVwVeBFutkQ5k468FZwecDGnP9gZrVRsGbbOpEXHRsB52LxEGdxPyDngVqw3+PBgEe02DBH3n" + + "EAR+vUoaf18LBlFkNgSFaRsKhvktCIj36guL8yAbj++8BpGfJwWTDXkblLFVBpdT5gWYB1IRmiNTD7XNXwqeA2UPwL3zCjHddAbej8sEqz" + + "U0C6z5YA6vpRcKsve8Fzl5HAk88agZt6tFCFJn6hO5t5kFvN2cC71rfga/QaAOwH9VBsM59wbFv4gEmViuBciRjgbJSXQMy9GbEOPSBQTN" + + "d2QEzzmRES+iLxLT8ZEF1K3oKNa36gTYC/0JDpijBN1h7AneDRAe35diD+ENjwnio30H5KVqFOjHFgbppaIS6xdHBvHjhATzC3EFUke5Cv" + + "YXVwn3CZwI+jVbBv4n3xE="; + } + + Assert.assertEquals("\"" + expectedSketch + "\"", helper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -194,16 +318,34 @@ public void roundMergeSketch() throws Exception Arrays.asList("dim", "multiDim"), Arrays.asList("timestamp", "dim", "multiDim", "sketch") ), - buildAggregatorJson("HLLSketchMerge", "sketch", ROUND), + buildAggregatorJson("HLLSketchMerge", "sketch", ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", ROUND) + buildGroupByQueryJson("HLLSketchMerge", "sketch", ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200L, (long) row.get(0)); + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketch = + "AwEHDAkIAAHIAAAAAiK0BAWIUw8GQkoICxROBBVEtgcXLiAGGbKkCRq80AUbIqkIH3iCByBm4xIlXqYJK/L7BixGBQYu5AsOLxoTBzSiYQ42" + + "KrIEN6znB7AS2wY/kEgE/JC2BUZKtwRH1rMNSizzBE744gVTYroEuD/5B9IWcwdeersMY+ISCNeMnQ/VIhcTakA7DWvM/gR0XmkKdwL+B+" + + "AxWRB7iqkJfHS5B4G8XQbeUy0E8ranC4WKqwuHtqoEimJoFo3EiQmT7KIJlH6hFJQHwgSaiDcHnQpsD57kmxjOoO8FoXrkCdSLjASkeLMH" + + "qE7tBKlYMwmqPmMHrjyIEbjsbwmwPp0Hsqi4BrVMrQ+4VqkMk8R2Bb0gUg6+9PgGv842FMOcmh/GYjQHyHokBMqMXgXNWtkMzvBbHzBDXw" + + "7SfIwR1VKVCdd4NAbaXBIH21ItBNxaAQbd9tsG31Z4EXvL9gfppI4TZBO3BvJ8HAb5+PEE+a6eD/yaMREGfe8RDF8FBQ1fegkRy64GFMeI" + + "DxcFewTDsDgEIKkZDiI76wUkz84NKjnxBi9TdhMwRVkLMiViBTWpMQQ2RwkH4K6sCDmLKAc7aa4EPtleBkD7DQ5B19cISe1qBoGehQfp0i" + + "gFU4PaB1R7Rw9Zf9QN753zEPTT8gthmacHZD0WBWXfihDyeFwMbeUdB27FNAZvy0oHcMl2B3FDyAl1gWYHed9lBHtl5ggfuiIIfSM8CoDB" + + "8wmBrzIEhGnMBYYv+Q2IJ6AEiV2yDI1JrAWPw7AG9D3fD5TDIQSXvygMmUlCBJq7YwSeLWoHn6ugDMPdUQSj85wKpG+rBqYRUQSpO7UHqn" + + "OHFq23zAiwW0YS58+VF7YvCQa3Gc8HuAH8Erl/PgW7yQEFvhmBBQZzZQTB6RcFwnHPBakaJQfEtZ8HxbGmEcYZagTL18IEywPsBs5JcwXP" + + "4UgE0U97BdMTZwXUOwoH1ZcUB9bP8QY7vb0M2RtKCNsXmgrdn3oL3tsZC+DtSQfhpXQG7eg9BeYLowvnUe4Ek1QxBe8t9wfzofoJl7tgGv" + + "XpZg32cfIG+2nDBvwtQgo="; + + Assert.assertEquals("\"" + expectedSketch + "\"", helper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -215,7 +357,7 @@ public void testPostAggs() throws Exception Arrays.asList("dim", "multiDim"), Arrays.asList("timestamp", "dim", "multiDim", "sketch") ), - buildAggregatorJson("HLLSketchMerge", "sketch", ROUND), + buildAggregatorJson("HLLSketchMerge", "sketch", ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount @@ -225,7 +367,7 @@ public void testPostAggs() throws Exception .setGranularity(Granularities.ALL) .setInterval(Intervals.ETERNITY) .setAggregatorSpecs( - new HllSketchMergeAggregatorFactory("sketch", "sketch", null, null, false) + new HllSketchMergeAggregatorFactory("sketch", "sketch", null, null, null, false) ) .setPostAggregatorSpecs( ImmutableList.of( @@ -254,7 +396,8 @@ public void testPostAggs() throws Exception ), new FieldAccessPostAggregator("f2", "sketch")), null, null - ) + ), + new FieldAccessPostAggregator("fieldAccess", "sketch") ) ) .build() @@ -270,6 +413,23 @@ public void testPostAggs() throws Exception + " UB : 200.01008469948434\n" + " OutOfOrder Flag: false\n" + " Coupon Count : 200\n"; + + // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure + // that our logic remains compatible across versions. + + final String expectedSketchBase64 = + "AwEHDAkIAAHIAAAAAiK0BAWIUw8GQkoICxROBBVEtgcXLiAGGbKkCRq80AUbIqkIH3iCByBm4xIlXqYJK/L7BixGBQYu5AsOLxoTBzSiYQ42" + + "KrIEN6znB7AS2wY/kEgE/JC2BUZKtwRH1rMNSizzBE744gVTYroEuD/5B9IWcwdeersMY+ISCNeMnQ/VIhcTakA7DWvM/gR0XmkKdwL+B+" + + "AxWRB7iqkJfHS5B4G8XQbeUy0E8ranC4WKqwuHtqoEimJoFo3EiQmT7KIJlH6hFJQHwgSaiDcHnQpsD57kmxjOoO8FoXrkCdSLjASkeLMHq" + + "E7tBKlYMwmqPmMHrjyIEbjsbwmwPp0Hsqi4BrVMrQ+4VqkMk8R2Bb0gUg6+9PgGv842FMOcmh/GYjQHyHokBMqMXgXNWtkMzvBbHzBDXw7S" + + "fIwR1VKVCdd4NAbaXBIH21ItBNxaAQbd9tsG31Z4EXvL9gfppI4TZBO3BvJ8HAb5+PEE+a6eD/yaMREGfe8RDF8FBQ1fegkRy64GFMeIDxc" + + "FewTDsDgEIKkZDiI76wUkz84NKjnxBi9TdhMwRVkLMiViBTWpMQQ2RwkH4K6sCDmLKAc7aa4EPtleBkD7DQ5B19cISe1qBoGehQfp0igFU4" + + "PaB1R7Rw9Zf9QN753zEPTT8gthmacHZD0WBWXfihDyeFwMbeUdB27FNAZvy0oHcMl2B3FDyAl1gWYHed9lBHtl5ggfuiIIfSM8CoDB8wmBr" + + "zIEhGnMBYYv+Q2IJ6AEiV2yDI1JrAWPw7AG9D3fD5TDIQSXvygMmUlCBJq7YwSeLWoHn6ugDMPdUQSj85wKpG+rBqYRUQSpO7UHqnOHFq23" + + "zAiwW0YS58+VF7YvCQa3Gc8HuAH8Erl/PgW7yQEFvhmBBQZzZQTB6RcFwnHPBakaJQfEtZ8HxbGmEcYZagTL18IEywPsBs5JcwXP4UgE0U9" + + "7BdMTZwXUOwoH1ZcUB9bP8QY7vb0M2RtKCNsXmgrdn3oL3tsZC+DtSQfhpXQG7eg9BeYLowvnUe4Ek1QxBe8t9wfzofoJl7tgGvXpZg32cf" + + "IG+2nDBvwtQgo="; + List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); @@ -279,6 +439,14 @@ public void testPostAggs() throws Exception Assert.assertEquals(expectedSummary, row.get(3)); // union with self = self Assert.assertEquals(expectedSummary, row.get(4).toString()); + Assert.assertEquals( + "\"" + expectedSketchBase64 + "\"", + helper.getObjectMapper().writeValueAsString(row.get(4)) + ); + Assert.assertEquals( + "\"" + expectedSketchBase64 + "\"", + helper.getObjectMapper().writeValueAsString(row.get(5)) + ); } private static String buildParserJson(List dimensions, List columns) @@ -322,13 +490,15 @@ private static String toJson(Object object) private static String buildAggregatorJson( String aggregationType, String aggregationFieldName, - boolean aggregationRound + boolean aggregationRound, + StringEncoding stringEncoding ) { Map aggregator = buildAggregatorObject( aggregationType, aggregationFieldName, - aggregationRound + aggregationRound, + stringEncoding ); return toJson(Collections.singletonList(aggregator)); } @@ -336,27 +506,31 @@ private static String buildAggregatorJson( private static Map buildAggregatorObject( String aggregationType, String aggregationFieldName, - boolean aggregationRound + boolean aggregationRound, + StringEncoding stringEncoding ) { return ImmutableMap.of( "type", aggregationType, "name", "sketch", "fieldName", aggregationFieldName, - "round", aggregationRound + "round", aggregationRound, + "stringEncoding", stringEncoding.toString() ); } private String buildGroupByQueryJson( String aggregationType, String aggregationFieldName, - boolean aggregationRound + boolean aggregationRound, + StringEncoding stringEncoding ) { Map aggregation = buildAggregatorObject( aggregationType, aggregationFieldName, - aggregationRound + aggregationRound, + stringEncoding ); Map object = new ImmutableMap.Builder() .put("queryType", "groupBy") @@ -364,6 +538,12 @@ private String buildGroupByQueryJson( .put("granularity", "ALL") .put("dimensions", Collections.emptyList()) .put("aggregations", Collections.singletonList(aggregation)) + .put( + "postAggregations", + Collections.singletonList( + ImmutableMap.of("type", "fieldAccess", "name", "sketch_raw", "fieldName", "sketch") + ) + ) .put("intervals", Collections.singletonList("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z")) .put("context", ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize.toString())) .build(); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java new file mode 100644 index 000000000000..f38fb92e9b67 --- /dev/null +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.datasketches.hll; + +import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.TestHelper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class HllSketchBuildAggregatorFactoryTest +{ + private final ObjectMapper jsonMapper; + + public HllSketchBuildAggregatorFactoryTest() + { + this.jsonMapper = TestHelper.makeJsonMapper().copy(); + jsonMapper.registerModules(new HllSketchModule().getJacksonModules()); + } + + @Test + public void testSerde() throws IOException + { + final HllSketchBuildAggregatorFactory factory = new HllSketchBuildAggregatorFactory( + "foo", + "bar", + 18, + TgtHllType.HLL_8.name(), + StringEncoding.UTF8, + true + ); + + final String serializedString = jsonMapper.writeValueAsString(factory); + + Assert.assertEquals( + "{\"type\":\"HLLSketchBuild\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":18,\"tgtHllType\":\"HLL_8\",\"stringEncoding\":\"utf8\",\"round\":true}", + serializedString + ); + + final AggregatorFactory factory2 = jsonMapper.readValue( + serializedString, + AggregatorFactory.class + ); + + Assert.assertEquals(factory, factory2); + } + + @Test + public void testSerdeWithDefaults() throws IOException + { + final HllSketchBuildAggregatorFactory factory = new HllSketchBuildAggregatorFactory( + "foo", + "bar", + null, + null, + null, + false + ); + + final String serializedString = jsonMapper.writeValueAsString(factory); + + Assert.assertEquals( + "{\"type\":\"HLLSketchBuild\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":12,\"tgtHllType\":\"HLL_4\"}", + serializedString + ); + + final AggregatorFactory factory2 = jsonMapper.readValue( + serializedString, + AggregatorFactory.class + ); + + Assert.assertEquals(factory, factory2); + } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(HllSketchBuildAggregatorFactory.class).usingGetClass().verify(); + } +} diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index 4e9961ee8e5b..ff58f6d3db46 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -19,18 +19,26 @@ package org.apache.druid.query.aggregation.datasketches.hll; +import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.segment.TestHelper; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.IOException; + public class HllSketchMergeAggregatorFactoryTest { private static final String NAME = "name"; private static final String FIELD_NAME = "fieldName"; private static final int LG_K = 2; private static final String TGT_HLL_TYPE = TgtHllType.HLL_6.name(); + private static final StringEncoding STRING_ENCODING = StringEncoding.UTF16LE; private static final boolean ROUND = true; private HllSketchMergeAggregatorFactory targetRound; @@ -39,8 +47,8 @@ public class HllSketchMergeAggregatorFactoryTest @Before public void setUp() { - targetRound = new HllSketchMergeAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, ROUND); - targetNoRound = new HllSketchMergeAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, !ROUND); + targetRound = new HllSketchMergeAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, STRING_ENCODING, ROUND); + targetNoRound = new HllSketchMergeAggregatorFactory(NAME, FIELD_NAME, LG_K, TGT_HLL_TYPE, STRING_ENCODING, !ROUND); } @Test(expected = AggregatorFactoryNotMergeableException.class) @@ -51,6 +59,7 @@ public void testGetMergingFactoryBadName() throws Exception FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); targetRound.getMergingFactory(other); @@ -64,11 +73,27 @@ public void testGetMergingFactoryBadType() throws Exception FIELD_NAME, LG_K, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); targetRound.getMergingFactory(other); } + @Test(expected = AggregatorFactoryNotMergeableException.class) + public void testGetMergingFactoryDifferentStringEncoding() throws Exception + { + HllSketchMergeAggregatorFactory other = new HllSketchMergeAggregatorFactory( + NAME, + FIELD_NAME, + LG_K, + TGT_HLL_TYPE, + StringEncoding.UTF8, + ROUND + ); + HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(other); + Assert.assertEquals(LG_K, result.getLgK()); + } + @Test public void testGetMergingFactoryOtherSmallerLgK() throws Exception { @@ -78,6 +103,7 @@ public void testGetMergingFactoryOtherSmallerLgK() throws Exception FIELD_NAME, smallerLgK, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(other); @@ -93,6 +119,7 @@ public void testGetMergingFactoryOtherLargerLgK() throws Exception FIELD_NAME, largerLgK, TGT_HLL_TYPE, + STRING_ENCODING, ROUND ); HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(other); @@ -108,6 +135,7 @@ public void testGetMergingFactoryOtherSmallerTgtHllType() throws Exception FIELD_NAME, LG_K, smallerTgtHllType, + STRING_ENCODING, ROUND ); HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(other); @@ -123,6 +151,7 @@ public void testGetMergingFactoryOtherLargerTgtHllType() throws Exception FIELD_NAME, LG_K, largerTgtHllType, + STRING_ENCODING, ROUND ); HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(other); @@ -156,4 +185,70 @@ public void testGetMergingFactoryThisRoundOtherRound() throws Exception HllSketchAggregatorFactory result = (HllSketchAggregatorFactory) targetRound.getMergingFactory(targetRound); Assert.assertTrue(result.isRound()); } + + @Test + public void testSerde() throws IOException + { + final ObjectMapper jsonMapper = TestHelper.makeJsonMapper().copy(); + jsonMapper.registerModules(new HllSketchModule().getJacksonModules()); + + final HllSketchMergeAggregatorFactory factory = new HllSketchMergeAggregatorFactory( + "foo", + "bar", + 18, + TgtHllType.HLL_8.name(), + StringEncoding.UTF8, + true + ); + + final String serializedString = jsonMapper.writeValueAsString(factory); + + Assert.assertEquals( + "{\"type\":\"HLLSketchMerge\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":18,\"tgtHllType\":\"HLL_8\",\"stringEncoding\":\"utf8\",\"round\":true}", + serializedString + ); + + final AggregatorFactory factory2 = jsonMapper.readValue( + serializedString, + AggregatorFactory.class + ); + + Assert.assertEquals(factory, factory2); + } + + @Test + public void testSerdeWithDefaults() throws IOException + { + final ObjectMapper jsonMapper = TestHelper.makeJsonMapper().copy(); + jsonMapper.registerModules(new HllSketchModule().getJacksonModules()); + + final HllSketchMergeAggregatorFactory factory = new HllSketchMergeAggregatorFactory( + "foo", + "bar", + null, + null, + null, + false + ); + + final String serializedString = jsonMapper.writeValueAsString(factory); + + Assert.assertEquals( + "{\"type\":\"HLLSketchMerge\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":12,\"tgtHllType\":\"HLL_4\"}", + serializedString + ); + + final AggregatorFactory factory2 = jsonMapper.readValue( + serializedString, + AggregatorFactory.class + ); + + Assert.assertEquals(factory, factory2); + } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(HllSketchBuildAggregatorFactory.class).usingGetClass().verify(); + } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchToEstimatePostAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchToEstimatePostAggregatorTest.java index eb3a4660ae98..5db645fae816 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchToEstimatePostAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchToEstimatePostAggregatorTest.java @@ -93,6 +93,7 @@ public void testResultArraySignature() "col", null, null, + null, false ) ) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index b1faed1cbede..488ed06bef24 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -174,6 +174,7 @@ public void setUp() throws Exception "dim1", null, null, + null, ROUND ) ) @@ -317,6 +318,7 @@ public void testApproxCountDistinctHllSketch() throws Exception "dim2", null, null, + null, ROUND ), new FilteredAggregatorFactory( @@ -325,6 +327,7 @@ public void testApproxCountDistinctHllSketch() throws Exception "dim2", null, null, + null, ROUND ), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null)) @@ -334,6 +337,7 @@ public void testApproxCountDistinctHllSketch() throws Exception "v0", null, null, + null, ROUND ), new HllSketchBuildAggregatorFactory( @@ -341,10 +345,11 @@ public void testApproxCountDistinctHllSketch() throws Exception "v1", null, null, + null, ROUND ), - new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), - new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND) + new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", null, ROUND), + new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, null, ROUND) ) ) .context(queryContext) @@ -404,6 +409,7 @@ public void testAvgDailyCountDistinctHllSketch() throws Exception "cnt", null, null, + null, ROUND ) ) @@ -576,6 +582,7 @@ public void testHllSketchPostAggs() throws Exception "dim2", null, null, + null, true ), new HllSketchBuildAggregatorFactory( @@ -583,6 +590,7 @@ public void testHllSketchPostAggs() throws Exception "m1", null, null, + null, true ), new HllSketchBuildAggregatorFactory( @@ -590,6 +598,7 @@ public void testHllSketchPostAggs() throws Exception "v0", null, null, + null, true ), new HllSketchBuildAggregatorFactory( @@ -597,6 +606,7 @@ public void testHllSketchPostAggs() throws Exception "v1", null, null, + null, true ), new HllSketchBuildAggregatorFactory( @@ -604,6 +614,7 @@ public void testHllSketchPostAggs() throws Exception "dim2", null, null, + null, true ) ) @@ -693,6 +704,7 @@ public void testtHllSketchPostAggsPostSort() throws Exception "dim2", null, null, + null, true ) ) From e27cc1cb3de02dec5dadec0e398b445a7132045f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 4 May 2021 19:26:24 -0700 Subject: [PATCH 02/13] Fix benchmark. --- .../org/apache/druid/benchmark/DataSketchesHllBenchmark.java | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java index 6fbffa54023e..bed30c85c64b 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java @@ -64,6 +64,7 @@ public class DataSketchesHllBenchmark "hll", null, null, + null, false ); From 492edd34f67378656d64c260e1b46cb8a320ce29 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 5 May 2021 10:43:44 -0700 Subject: [PATCH 03/13] Fix style issues, improve test coverage. --- .../java/util/common/StringEncoding.java | 19 ++ ...codingDefaultUTF16LEJsonIncludeFilter.java | 19 ++ .../java/util/common/StringEncodingTest.java | 2 +- .../hll/HllSketchBuildAggregatorFactory.java | 1 - .../hll/HllSketchAggregatorFactoryTest.java | 13 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 287 ++++++++++-------- 6 files changed, 209 insertions(+), 132 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java index 3d1694d9c6cf..480a15a022e7 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.java.util.common; import com.fasterxml.jackson.annotation.JsonCreator; diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java index 48676c547530..71547a11f10b 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.java.util.common; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java b/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java index 040b4599f915..b549e63c8721 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/StringEncodingTest.java @@ -45,7 +45,7 @@ public void testSerde() throws IOException } @Test - public void testGetCacheKey() throws IOException + public void testGetCacheKey() { Assert.assertFalse( Arrays.equals( diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index 0cad2406a7dc..a6be8518363b 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -31,7 +31,6 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java index db50cfaea676..b6e2afb71d97 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactoryTest.java @@ -19,8 +19,6 @@ package org.apache.druid.query.aggregation.datasketches.hll; -import com.fasterxml.jackson.databind.ObjectMapper; -import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; import org.apache.druid.java.util.common.StringEncoding; @@ -35,7 +33,6 @@ import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.easymock.EasyMock; @@ -44,7 +41,6 @@ import org.junit.Test; import javax.annotation.Nullable; -import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.Arrays; @@ -136,6 +132,7 @@ public void testFinalizeComputatioNoRound() public void testEqualsSameObject() { Assert.assertEquals(target, target); + Assert.assertArrayEquals(target.getCacheKey(), target.getCacheKey()); } @Test @@ -162,6 +159,7 @@ public void testEqualsOtherDiffName() ROUND ); Assert.assertNotEquals(target, other); + Assert.assertFalse(Arrays.equals(target.getCacheKey(), other.getCacheKey())); } @Test @@ -176,6 +174,7 @@ public void testEqualsOtherDiffFieldName() ROUND ); Assert.assertNotEquals(target, other); + Assert.assertFalse(Arrays.equals(target.getCacheKey(), other.getCacheKey())); } @Test @@ -190,6 +189,7 @@ public void testEqualsOtherDiffLgK() ROUND ); Assert.assertNotEquals(target, other); + Assert.assertFalse(Arrays.equals(target.getCacheKey(), other.getCacheKey())); } @Test @@ -204,6 +204,7 @@ public void testEqualsOtherDiffTgtHllType() ROUND ); Assert.assertNotEquals(target, other); + Assert.assertFalse(Arrays.equals(target.getCacheKey(), other.getCacheKey())); } @Test @@ -218,6 +219,9 @@ public void testEqualsOtherDiffRound() !ROUND ); Assert.assertNotEquals(target, other); + + // Rounding does not affect per-segment results, so it does not affect cache key + Assert.assertArrayEquals(target.getCacheKey(), other.getCacheKey()); } @Test @@ -232,6 +236,7 @@ public void testEqualsOtherMatches() ROUND ); Assert.assertEquals(target, other); + Assert.assertArrayEquals(target.getCacheKey(), other.getCacheKey()); } @Test diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 488ed06bef24..c451f4c2d6e2 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -35,10 +35,12 @@ import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule; @@ -49,12 +51,15 @@ import org.apache.druid.query.aggregation.post.ExpressionPostAggregator; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; @@ -107,6 +112,97 @@ public class HllSketchSqlAggregatorTest extends CalciteTestBase private static Closer resourceCloser; private static AuthenticationResult authenticationResult = CalciteTests.REGULAR_USER_AUTH_RESULT; + // For testHllSketchPostAggsGroupBy, testHllSketchPostAggsTimeseries + private static final Object[] EXPECTED_PA_RESULT = + new Object[]{ + "\"AgEHDAMIAgDhUv8P63iABQ==\"", + "\"AgEHDAMIBgALpZ0PjpTfBY5ElQo+C7UE4jA+DKfcYQQ=\"", + "\"AgEHDAMIAQAr8vsG\"", + 2.000000004967054d, + 3.000000004967054d, + 3.000000014901161d, + 2.000000004967054d, + "[2.000000004967054,2.0,2.0001997319422404]", + "[2.000000004967054,2.0,2.000099863468538]", + "\"AgEHDAMIBgC1EYgH1mlHBwsKPwu5SK8MIiUxB7iZVwU=\"", + 2L, + "### HLL SKETCH SUMMARY: \n" + + " Log Config K : 12\n" + + " Hll Target : HLL_4\n" + + " Current Mode : LIST\n" + + " Memory : false\n" + + " LB : 2.0\n" + + " Estimate : 2.000000004967054\n" + + " UB : 2.000099863468538\n" + + " OutOfOrder Flag: false\n" + + " Coupon Count : 2\n", + "### HLL SKETCH SUMMARY: \n" + + " LOG CONFIG K : 12\n" + + " HLL TARGET : HLL_4\n" + + " CURRENT MODE : LIST\n" + + " MEMORY : FALSE\n" + + " LB : 2.0\n" + + " ESTIMATE : 2.000000004967054\n" + + " UB : 2.000099863468538\n" + + " OUTOFORDER FLAG: FALSE\n" + + " COUPON COUNT : 2\n", + 2.0 + }; + + private static final List EXPECTED_PA_VIRTUAL_COLUMNS = + ImmutableList.of( + new ExpressionVirtualColumn( + "v0", + "concat(\"dim2\",'hello')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ), + new ExpressionVirtualColumn( + "v1", + "pow(abs((\"m1\" + 100)),2)", + ValueType.DOUBLE, + TestExprMacroTable.INSTANCE + ) + ); + + private static final List EXPECTED_PA_AGGREGATORS = + ImmutableList.of( + new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, true), + new HllSketchBuildAggregatorFactory("a1", "m1", null, null, null, true), + new HllSketchBuildAggregatorFactory("a2", "cnt", null, null, null, true), + new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, true), + new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, true), + new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true) + ); + + private static final List EXPECTED_PA_POST_AGGREGATORS = + ImmutableList.of( + new FieldAccessPostAggregator("p0", "a0"), + new FieldAccessPostAggregator("p1", "a1"), + new FieldAccessPostAggregator("p2", "a2"), + new HllSketchToEstimatePostAggregator("p4", new FieldAccessPostAggregator("p3", "a0"), false), + new HllSketchToEstimatePostAggregator("p6", new FieldAccessPostAggregator("p5", "a0"), false), + new ExpressionPostAggregator("p7", "(p6 + 1)", null, TestExprMacroTable.INSTANCE), + new HllSketchToEstimatePostAggregator("p9", new FieldAccessPostAggregator("p8", "a3"), false), + new HllSketchToEstimatePostAggregator("p11", new FieldAccessPostAggregator("p10", "a0"), false), + new ExpressionPostAggregator("p12", "abs(p11)", null, TestExprMacroTable.INSTANCE), + new HllSketchToEstimateWithBoundsPostAggregator( + "p14", + new FieldAccessPostAggregator("p13", "a0"), + 2 + ), + new HllSketchToEstimateWithBoundsPostAggregator( + "p16", + new FieldAccessPostAggregator("p15", "a0"), + 1 + ), + new FieldAccessPostAggregator("p17", "a4"), + new HllSketchToStringPostAggregator("p19", new FieldAccessPostAggregator("p18", "a0")), + new HllSketchToStringPostAggregator("p21", new FieldAccessPostAggregator("p20", "a0")), + new ExpressionPostAggregator("p22", "upper(p21)", null, TestExprMacroTable.INSTANCE), + new HllSketchToEstimatePostAggregator("p24", new FieldAccessPostAggregator("p23", "a0"), true) + ); + @Rule public ExpectedException expectedException = ExpectedException.none(); @@ -398,9 +494,7 @@ public void testAvgDailyCountDistinctHllSketch() throws Exception new QueryDataSource( Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) - .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of( - Filtration.eternity() - ))) + .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)) .aggregators( Collections.singletonList( @@ -434,8 +528,8 @@ public void testAvgDailyCountDistinctHllSketch() throws Exception .setAggregatorSpecs( NullHandling.replaceWithDefault() ? Arrays.asList( - new LongSumAggregatorFactory("_a0:sum", "a0"), - new CountAggregatorFactory("_a0:count") + new LongSumAggregatorFactory("_a0:sum", "a0"), + new CountAggregatorFactory("_a0:count") ) : Arrays.asList( new LongSumAggregatorFactory("_a0:sum", "a0"), @@ -486,13 +580,14 @@ public void testApproxCountDistinctHllSketchIsRounded() throws Exception } @Test - public void testHllSketchPostAggs() throws Exception + public void testHllSketchPostAggsTimeseries() throws Exception { SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); final String sql = "SELECT\n" + " DS_HLL(dim2),\n" + " DS_HLL(m1),\n" + + " DS_HLL(cnt),\n" + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2)),\n" + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2)) + 1,\n" + " HLL_SKETCH_ESTIMATE(DS_HLL(CONCAT(dim2, 'hello'))),\n" @@ -513,41 +608,7 @@ public void testHllSketchPostAggs() throws Exception DEFAULT_PARAMETERS, authenticationResult ).toList(); - final List expectedResults = ImmutableList.of( - new Object[]{ - "\"AgEHDAMIAgDhUv8P63iABQ==\"", - "\"AgEHDAMIBgALpZ0PjpTfBY5ElQo+C7UE4jA+DKfcYQQ=\"", - 2.000000004967054d, - 3.000000004967054d, - 3.000000014901161d, - 2.000000004967054d, - "[2.000000004967054,2.0,2.0001997319422404]", - "[2.000000004967054,2.0,2.000099863468538]", - "\"AgEHDAMIBgC1EYgH1mlHBwsKPwu5SK8MIiUxB7iZVwU=\"", - 2L, - "### HLL SKETCH SUMMARY: \n" - + " Log Config K : 12\n" - + " Hll Target : HLL_4\n" - + " Current Mode : LIST\n" - + " Memory : false\n" - + " LB : 2.0\n" - + " Estimate : 2.000000004967054\n" - + " UB : 2.000099863468538\n" - + " OutOfOrder Flag: false\n" - + " Coupon Count : 2\n", - "### HLL SKETCH SUMMARY: \n" - + " LOG CONFIG K : 12\n" - + " HLL TARGET : HLL_4\n" - + " CURRENT MODE : LIST\n" - + " MEMORY : FALSE\n" - + " LB : 2.0\n" - + " ESTIMATE : 2.000000004967054\n" - + " UB : 2.000099863468538\n" - + " OUTOFORDER FLAG: FALSE\n" - + " COUPON COUNT : 2\n", - 2.0 - } - ); + final List expectedResults = ImmutableList.of(EXPECTED_PA_RESULT); Assert.assertEquals(expectedResults.size(), results.size()); for (int i = 0; i < expectedResults.size(); i++) { @@ -561,91 +622,9 @@ public void testHllSketchPostAggs() throws Exception .dataSource(CalciteTests.DATASOURCE1) .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) .granularity(Granularities.ALL) - .virtualColumns( - new ExpressionVirtualColumn( - "v0", - "concat(\"dim2\",'hello')", - ValueType.STRING, - TestExprMacroTable.INSTANCE - ), - new ExpressionVirtualColumn( - "v1", - "pow(abs((\"m1\" + 100)),2)", - ValueType.DOUBLE, - TestExprMacroTable.INSTANCE - ) - ) - .aggregators( - ImmutableList.of( - new HllSketchBuildAggregatorFactory( - "a0", - "dim2", - null, - null, - null, - true - ), - new HllSketchBuildAggregatorFactory( - "a1", - "m1", - null, - null, - null, - true - ), - new HllSketchBuildAggregatorFactory( - "a2", - "v0", - null, - null, - null, - true - ), - new HllSketchBuildAggregatorFactory( - "a3", - "v1", - null, - null, - null, - true - ), - new HllSketchBuildAggregatorFactory( - "a4", - "dim2", - null, - null, - null, - true - ) - ) - ) - .postAggregators( - ImmutableList.of( - new FieldAccessPostAggregator("p0", "a0"), - new FieldAccessPostAggregator("p1", "a1"), - new HllSketchToEstimatePostAggregator("p3", new FieldAccessPostAggregator("p2", "a0"), false), - new HllSketchToEstimatePostAggregator("p5", new FieldAccessPostAggregator("p4", "a0"), false), - new ExpressionPostAggregator("p6", "(p5 + 1)", null, TestExprMacroTable.INSTANCE), - new HllSketchToEstimatePostAggregator("p8", new FieldAccessPostAggregator("p7", "a2"), false), - new HllSketchToEstimatePostAggregator("p10", new FieldAccessPostAggregator("p9", "a0"), false), - new ExpressionPostAggregator("p11", "abs(p10)", null, TestExprMacroTable.INSTANCE), - new HllSketchToEstimateWithBoundsPostAggregator( - "p13", - new FieldAccessPostAggregator("p12", "a0"), - 2 - ), - new HllSketchToEstimateWithBoundsPostAggregator( - "p15", - new FieldAccessPostAggregator("p14", "a0"), - 1 - ), - new FieldAccessPostAggregator("p16", "a3"), - new HllSketchToStringPostAggregator("p18", new FieldAccessPostAggregator("p17", "a0")), - new HllSketchToStringPostAggregator("p20", new FieldAccessPostAggregator("p19", "a0")), - new ExpressionPostAggregator("p21", "upper(p20)", null, TestExprMacroTable.INSTANCE), - new HllSketchToEstimatePostAggregator("p23", new FieldAccessPostAggregator("p22", "a0"), true) - ) - ) + .virtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) + .aggregators(EXPECTED_PA_AGGREGATORS) + .postAggregators(EXPECTED_PA_POST_AGGREGATORS) .context(queryContext) .build() .withOverriddenContext(ImmutableMap.of("skipEmptyBuckets", true)); @@ -654,6 +633,62 @@ public void testHllSketchPostAggs() throws Exception Assert.assertEquals(expectedQuery, actualQuery); } + @Test + public void testHllSketchPostAggsGroupBy() throws Exception + { + SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); + + final String sql = "SELECT\n" + + " DS_HLL(dim2),\n" + + " DS_HLL(m1),\n" + + " DS_HLL(cnt),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2)),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2)) + 1,\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(CONCAT(dim2, 'hello'))),\n" + + " ABS(HLL_SKETCH_ESTIMATE(DS_HLL(dim2))),\n" + + " HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS(DS_HLL(dim2), 2),\n" + + " HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS(DS_HLL(dim2)),\n" + + " DS_HLL(POWER(ABS(m1 + 100), 2)),\n" + + " APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + + " HLL_SKETCH_TO_STRING(DS_HLL(dim2)),\n" + + " UPPER(HLL_SKETCH_TO_STRING(DS_HLL(dim2))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2), true)\n" + + "FROM druid.foo\n" + + "GROUP BY cnt"; + + + // Verify results + final List results = sqlLifecycle.runSimple( + sql, + queryContext, + DEFAULT_PARAMETERS, + authenticationResult + ).toList(); + final List expectedResults = ImmutableList.of(EXPECTED_PA_RESULT); + + Assert.assertEquals(expectedResults.size(), results.size()); + for (int i = 0; i < expectedResults.size(); i++) { + Assert.assertArrayEquals(expectedResults.get(i), results.get(i)); + } + + Query actualQuery = Iterables.getOnlyElement(queryLogHook.getRecordedQueries()); + + Query expectedQuery = + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(new MultipleIntervalSegmentSpec(Collections.singletonList(Filtration.eternity()))) + .setGranularity(Granularities.ALL) + .setVirtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) + .setDimensions(new DefaultDimensionSpec("cnt", "d0", ValueType.LONG)) + .setAggregatorSpecs(EXPECTED_PA_AGGREGATORS) + .setPostAggregatorSpecs(EXPECTED_PA_POST_AGGREGATORS) + .setContext(queryContext) + .build(); + + // Verify query + Assert.assertEquals(expectedQuery, actualQuery); + } + @Test public void testtHllSketchPostAggsPostSort() throws Exception { From c43d99e3dc4f3c88eaf140fe3a1b240d702b1a1d Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 5 May 2021 16:07:02 -0700 Subject: [PATCH 04/13] Put round back, to make IT updates easier. --- .../datasketches/hll/HllSketchAggregatorFactory.java | 1 - .../hll/HllSketchBuildAggregatorFactoryTest.java | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index 62227db53a8c..b11697ae5b71 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -109,7 +109,6 @@ public StringEncoding getStringEncoding() } @JsonProperty - @JsonInclude(JsonInclude.Include.NON_DEFAULT) public boolean isRound() { return round; diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java index f38fb92e9b67..29b4512613d4 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactoryTest.java @@ -82,7 +82,13 @@ public void testSerdeWithDefaults() throws IOException final String serializedString = jsonMapper.writeValueAsString(factory); Assert.assertEquals( - "{\"type\":\"HLLSketchBuild\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":12,\"tgtHllType\":\"HLL_4\"}", + "{\"type\":\"HLLSketchBuild\"," + + "\"name\":\"foo\"," + + "\"fieldName\":\"bar\"," + + "\"lgK\":12," + + "\"tgtHllType\":\"HLL_4\"," + + "\"round\":false" + + "}", serializedString ); From 0d7d9aad781528b09013c91bc95699f011f314a7 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 5 May 2021 16:08:30 -0700 Subject: [PATCH 05/13] Fix test. --- .../hll/HllSketchMergeAggregatorFactoryTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index ff58f6d3db46..19416c8b630b 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -234,7 +234,13 @@ public void testSerdeWithDefaults() throws IOException final String serializedString = jsonMapper.writeValueAsString(factory); Assert.assertEquals( - "{\"type\":\"HLLSketchMerge\",\"name\":\"foo\",\"fieldName\":\"bar\",\"lgK\":12,\"tgtHllType\":\"HLL_4\"}", + "{\"type\":\"HLLSketchMerge\"," + + "\"name\":\"foo\"," + + "\"fieldName\":\"bar\"," + + "\"lgK\":12," + + "\"tgtHllType\":\"HLL_4\"," + + "\"round\":false" + + "}", serializedString ); From f26dc56acb1dddb0ba800468d5eb711be779c68e Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sat, 23 Oct 2021 10:31:34 -0700 Subject: [PATCH 06/13] Fix issue with filtered aggregators and add test. --- .../java/util/common/StringEncoding.java | 2 +- ...ueStringHllSketchBuildVectorProcessor.java | 3 +- .../ObjectHllSketchBuildVectorProcessor.java | 5 +- ...ueStringHllSketchBuildVectorProcessor.java | 3 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 115 +++++++++++++++++- 5 files changed, 121 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java index 480a15a022e7..111558878d96 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncoding.java @@ -26,7 +26,7 @@ /** * An enum that provides a way for users to specify what encoding should be used when hashing strings. * - * The main reason for thsi setting's existence is getting the best performance possible. When operating on memory + * The main reason for this setting's existence is getting the best performance possible. When operating on memory * mapped segments -- which store strings as UTF-8 -- it is fastest to use "UTF8". When operating on the result of * expressions, or on an in-heap IncrementalIndex -- which use Java strings -- it is fastest to use "UTF16LE". * diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java index 58991413b76b..074c18bbd6e2 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/MultiValueStringHllSketchBuildVectorProcessor.java @@ -73,10 +73,11 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in final IndexedInts[] vector = selector.getRowVector(); for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; final int position = positions[i] + positionOffset; final HllSketch sketch = helper.getSketchAtPosition(buf, position); - final IndexedInts ids = vector[rows != null ? rows[i] : i]; + final IndexedInts ids = vector[idx]; final int sz = ids.size(); for (int j = 0; j < sz; j++) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java index 99666a32306f..56eceb15f5c1 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/ObjectHllSketchBuildVectorProcessor.java @@ -71,14 +71,15 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in final Object[] vector = selector.getObjectVector(); for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; final int position = positions[i] + positionOffset; final HllSketch sketch = helper.getSketchAtPosition(buf, position); - if (vector[i] != null) { + if (vector[idx] != null) { HllSketchBuildUtil.updateSketch( sketch, stringEncoding, - vector[i] + vector[idx] ); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java index b2b0070f8385..6c93b5669b81 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/vector/SingleValueStringHllSketchBuildVectorProcessor.java @@ -67,6 +67,7 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in final int[] vector = selector.getRowVector(); for (int i = 0; i < numRows; i++) { + final int idx = rows != null ? rows[i] : i; final int position = positions[i] + positionOffset; final HllSketch sketch = helper.getSketchAtPosition(buf, position); @@ -74,7 +75,7 @@ public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable in sketch, stringEncoding, selector, - vector[i] + vector[idx] ); } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index c9a5f6dac3fe..3fc640125fb7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -75,6 +75,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest { @@ -143,6 +144,12 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest new HllSketchBuildAggregatorFactory("a5", "dim2", null, null, null, true) ); + private static final List EXPECTED_FILTERED_AGGREGATORS = + EXPECTED_PA_AGGREGATORS.stream() + .limit(5) + .map(factory -> new FilteredAggregatorFactory(factory, selector("dim2", "a", null))) + .collect(Collectors.toList()); + private static final List EXPECTED_PA_POST_AGGREGATORS = ImmutableList.of( new FieldAccessPostAggregator("p0", "a0"), @@ -171,6 +178,20 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest new HllSketchToEstimatePostAggregator("p24", new FieldAccessPostAggregator("p23", "a0"), true) ); + private static final List EXPECTED_FILTERED_POST_AGGREGATORS = + ImmutableList.of( + new FieldAccessPostAggregator("p0", "a0"), + new FieldAccessPostAggregator("p1", "a1"), + new FieldAccessPostAggregator("p2", "a2"), + new FieldAccessPostAggregator("p3", "a3"), + new FieldAccessPostAggregator("p4", "a4"), + new HllSketchToEstimatePostAggregator("p6", new FieldAccessPostAggregator("p5", "a0"), false), + new HllSketchToEstimatePostAggregator("p8", new FieldAccessPostAggregator("p7", "a1"), false), + new HllSketchToEstimatePostAggregator("p10", new FieldAccessPostAggregator("p9", "a2"), false), + new HllSketchToEstimatePostAggregator("p12", new FieldAccessPostAggregator("p11", "a3"), false), + new HllSketchToEstimatePostAggregator("p14", new FieldAccessPostAggregator("p13", "a4"), false) + ); + @Override public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException { @@ -463,7 +484,97 @@ public void testApproxCountDistinctHllSketchIsRounded() throws Exception } @Test - public void testHllSketchPostAggsTimeseries() throws Exception + public void testHllSketchFilteredAggregatorsGroupBy() throws Exception + { + testQuery( + "SELECT\n" + + " DS_HLL(dim2) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(m1) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(cnt) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(CONCAT(dim2, 'hello')) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(POWER(ABS(m1 + 100), 2)) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(m1) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(cnt) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(CONCAT(dim2, 'hello')) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(POWER(ABS(m1 + 100), 2)) FILTER(WHERE MV_CONTAINS(dim2, 'a')))\n" + + "FROM druid.foo\n" + + "GROUP BY cnt", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(new MultipleIntervalSegmentSpec(Collections.singletonList(Filtration.eternity()))) + .setGranularity(Granularities.ALL) + .setVirtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) + .setDimensions(new DefaultDimensionSpec("cnt", "d0", ColumnType.LONG)) + .setAggregatorSpecs(EXPECTED_FILTERED_AGGREGATORS) + .setPostAggregatorSpecs(EXPECTED_FILTERED_POST_AGGREGATORS) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + "\"AgEHDAMIAQDhUv8P\"", + "\"AgEHDAMIAgALpZ0PPgu1BA==\"", + "\"AgEHDAMIAQAr8vsG\"", + "\"AgEHDAMIAQCba0kG\"", + "\"AgEHDAMIAgC1EYgHuUivDA==\"", + 1.0, + 2.000000004967054, + 1.0, + 1.0, + 2.000000004967054 + } + ) + ); + } + + @Test + public void testHllSketchFilteredAggregatorsTimeseries() throws Exception + { + testQuery( + "SELECT\n" + + " DS_HLL(dim2) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(m1) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(cnt) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(CONCAT(dim2, 'hello')) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " DS_HLL(POWER(ABS(m1 + 100), 2)) FILTER(WHERE MV_CONTAINS(dim2, 'a')),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(dim2) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(m1) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(cnt) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(CONCAT(dim2, 'hello')) FILTER(WHERE MV_CONTAINS(dim2, 'a'))),\n" + + " HLL_SKETCH_ESTIMATE(DS_HLL(POWER(ABS(m1 + 100), 2)) FILTER(WHERE MV_CONTAINS(dim2, 'a')))\n" + + "FROM druid.foo", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))) + .granularity(Granularities.ALL) + .virtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) + .aggregators(EXPECTED_FILTERED_AGGREGATORS) + .postAggregators(EXPECTED_FILTERED_POST_AGGREGATORS) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + "\"AgEHDAMIAQDhUv8P\"", + "\"AgEHDAMIAgALpZ0PPgu1BA==\"", + "\"AgEHDAMIAQAr8vsG\"", + "\"AgEHDAMIAQCba0kG\"", + "\"AgEHDAMIAgC1EYgHuUivDA==\"", + 1.0, + 2.000000004967054, + 1.0, + 1.0, + 2.000000004967054 + } + ) + ); + } + + @Test + public void testHllSketchPostAggsGroupBy() throws Exception { testQuery( "SELECT\n" @@ -500,7 +611,7 @@ public void testHllSketchPostAggsTimeseries() throws Exception } @Test - public void testHllSketchPostAggsGroupBy() throws Exception + public void testHllSketchPostAggsTimeseries() throws Exception { testQuery( "SELECT\n" From 53a708fa3f29d517260f954c87288da934dd9222 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sat, 23 Oct 2021 10:51:00 -0700 Subject: [PATCH 07/13] Use DS native update(ByteBuffer) method. Improve test coverage. --- .../hll/HllSketchBuildAggregator.java | 3 - .../datasketches/hll/HllSketchBuildUtil.java | 11 +- .../hll/HllSketchBuildUtilTest.java | 188 ++++++++++++++++-- 3 files changed, 174 insertions(+), 28 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java index 5c4950d40638..0c8af84bbcac 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java @@ -21,9 +21,6 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; -import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.aggregation.Aggregator; import java.util.function.Consumer; diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java index eb62a68a5621..af0ac097eef6 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -75,16 +75,7 @@ public static void updateSketchWithDictionarySelector( final ByteBuffer buf = selector.lookupNameUtf8(id); if (buf != null) { - // We must copy the bytes here, because HllSketch doesn't have a method that accepts ByteBuffer. - // Should be possible to optimize this by adding such a method to DataSketches. - final byte[] bytes = new byte[buf.remaining()]; - buf.get(bytes); - sketch.update(bytes); - } else if (NullHandling.replaceWithDefault()) { - // Treat as empty string. - sketch.update(StringUtils.EMPTY_BYTES); - } else { - // Do nothing. + sketch.update(buf); } } else { final String s = NullHandling.nullToEmptyIfNeeded(selector.lookupName(id)); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java index d25fa4efd6f3..eca5e6f37a4f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtilTest.java @@ -19,41 +19,52 @@ package org.apache.druid.query.aggregation.datasketches.hll; +import com.google.common.collect.ImmutableMap; import org.apache.datasketches.hll.HllSketch; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.DimensionDictionarySelector; +import org.apache.druid.segment.IdLookup; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.Map; /** * Tests for {@link HllSketchBuildUtil#updateSketch}. - * - * Tests of the aggregator generally should go in {@link HllSketchAggregatorTest} instead. */ public class HllSketchBuildUtilTest extends InitializedNullHandlingTest { + private static final Map DICTIONARY = ImmutableMap.of( + 1, "bar", + 2, "foo" + ); + private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K); @Test - public void testUpdateSketchVariousNumbers() + public void testUpdateSketchListsOfStringsUTF16LE() { - updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0), 3.0); - assertSketchEstimate(4); - } + updateSketch( + StringEncoding.UTF16LE, + Arrays.asList("1", "2"), + Arrays.asList("2", "", "3", "11"), + Arrays.asList("1", null, "3", "12"), + Arrays.asList("1", "3", "13") + ); - @Test - public void testUpdateSketchStrings() - { - updateSketch("foo", null, "bar", ""); - assertSketchEstimate(2); + assertSketchEstimate(6); } @Test - public void testUpdateSketchListsOfStrings() + public void testUpdateSketchListsOfStringsUTF8() { updateSketch( + StringEncoding.UTF16LE, Arrays.asList("1", "2"), Arrays.asList("2", "", "3", "11"), Arrays.asList("1", null, "3", "12"), @@ -67,6 +78,7 @@ public void testUpdateSketchListsOfStrings() public void testUpdateSketchCharArray() { updateSketch( + StringEncoding.UTF16LE, new char[]{1, 2}, new char[]{2, 3, 11}, new char[]{1, 2}, @@ -80,6 +92,7 @@ public void testUpdateSketchCharArray() public void testUpdateSketchByteArray() { updateSketch( + StringEncoding.UTF16LE, new byte[]{1, 2}, new byte[]{2, 3, 11}, new byte[]{1, 2}, @@ -93,6 +106,7 @@ public void testUpdateSketchByteArray() public void testUpdateSketchIntArray() { updateSketch( + StringEncoding.UTF16LE, new int[]{1, 2}, new int[]{2, 3, 11}, new int[]{1, 2}, @@ -106,6 +120,7 @@ public void testUpdateSketchIntArray() public void testUpdateSketchLongArray() { updateSketch( + StringEncoding.UTF16LE, new long[]{1, 2}, new long[]{2, 3, 11}, new long[]{1, 2}, @@ -115,22 +130,165 @@ public void testUpdateSketchLongArray() assertSketchEstimate(3); } - private void updateSketch(final Object first, final Object... others) + @Test + public void testUpdateSketchWithDictionarySelector8to8() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, true); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector, 0, 1, 2, 1); + assertSketchEstimate(2); + } + + @Test + public void testUpdateSketchWithDictionarySelector8to16() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, true); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector, 0, 1, 2, 1); + assertSketchEstimate(2); + } + + @Test + public void testUpdateSketchWithDictionarySelector16to8() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, false); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector, 0, 1, 2, 1); + assertSketchEstimate(2); + } + + @Test + public void testUpdateSketchWithDictionarySelector16to16() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, false); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector, 0, 1, 2, 1); + assertSketchEstimate(2); + } + + @Test + public void testUpdateSketchWithDictionarySelectorMixedTo8() + { + final TestDictionarySelector selector1 = new TestDictionarySelector(DICTIONARY, false); + final TestDictionarySelector selector2 = new TestDictionarySelector(DICTIONARY, true); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector1, 0, 1, 2, 1); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector2, 0, 1, 2, 1); + assertSketchEstimate(2); // Duplicates are de-duplicated + } + + @Test + public void testUpdateSketchWithDictionarySelectorMixedTo16() + { + final TestDictionarySelector selector1 = new TestDictionarySelector(DICTIONARY, false); + final TestDictionarySelector selector2 = new TestDictionarySelector(DICTIONARY, true); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector1, 0, 1, 2, 1); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector2, 0, 1, 2, 1); + assertSketchEstimate(2); // Duplicates are de-duplicated + } + + @Test + public void testUpdateSketchWithDictionarySelector8ToMixed() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, true); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector, 0, 1, 2, 1); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector, 0, 1, 2, 1); + assertSketchEstimate(4); // Incompatible hashes + } + + @Test + public void testUpdateSketchWithDictionarySelector16ToMixed() + { + final TestDictionarySelector selector = new TestDictionarySelector(DICTIONARY, false); + updateSketchWithDictionarySelector(StringEncoding.UTF8, selector, 0, 1, 2, 1); + updateSketchWithDictionarySelector(StringEncoding.UTF16LE, selector, 0, 1, 2, 1); + assertSketchEstimate(4); // Incompatible hashes + } + + private void updateSketch(final StringEncoding stringEncoding, final Object first, final Object... others) { // first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value. if (first != null) { - HllSketchBuildUtil.updateSketch(sketch, StringEncoding.UTF16LE, first); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, first); } for (final Object o : others) { if (o != null) { - HllSketchBuildUtil.updateSketch(sketch, StringEncoding.UTF16LE, o); + HllSketchBuildUtil.updateSketch(sketch, stringEncoding, o); } } } + private void updateSketchWithDictionarySelector( + final StringEncoding stringEncoding, + final DimensionDictionarySelector selector, + final int... ids + ) + { + for (int id : ids) { + HllSketchBuildUtil.updateSketchWithDictionarySelector(sketch, stringEncoding, selector, id); + } + } + private void assertSketchEstimate(final long estimate) { Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1); } + + private static class TestDictionarySelector implements DimensionDictionarySelector + { + private final Map dictionary; + private final boolean supportsLookupNameUtf8; + + public TestDictionarySelector(final Map dictionary, final boolean supportsLookupNameUtf8) + { + this.dictionary = dictionary; + this.supportsLookupNameUtf8 = supportsLookupNameUtf8; + } + + @Override + public int getValueCardinality() + { + // Unused by this test + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public String lookupName(int id) + { + return dictionary.get(id); + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + if (!supportsLookupNameUtf8) { + throw new UnsupportedOperationException(); + } + + final String s = dictionary.get(id); + + if (s == null) { + return null; + } else { + return ByteBuffer.wrap(StringUtils.toUtf8(s)); + } + } + + @Override + public boolean supportsLookupNameUtf8() + { + return supportsLookupNameUtf8; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + } } From f7ff8d4c9e80492bf7b43ecc6cbd15da5cd1efbd Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sat, 23 Oct 2021 16:56:46 -0700 Subject: [PATCH 08/13] Add another suppression. --- .../common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java index 71547a11f10b..332df030856e 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java +++ b/core/src/main/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilter.java @@ -28,7 +28,7 @@ * This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs * exclusions (see spotbugs-exclude.xml). */ -@SuppressWarnings({"EqualsAndHashcode", "EqualsWhichDoesntCheckParameterClass"}) +@SuppressWarnings({"EqualsAndHashcode", "EqualsHashCode", "EqualsWhichDoesntCheckParameterClass"}) public class StringEncodingDefaultUTF16LEJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode] { @Override From dacd05e0b4028d1c0aba8b3d721094311183b0b7 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sat, 14 May 2022 16:18:16 -0700 Subject: [PATCH 09/13] Fix ITAutoCompactionTest. --- .../druid/tests/coordinator/duty/ITAutoCompactionTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 32d563f25b27..f348e3d180e0 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -172,7 +172,7 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis // FloatSumAggregator combine method takes in two Float but return Double new FloatSumAggregatorFactory("sum_added", "added"), new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), false), + new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L) }, false @@ -266,7 +266,7 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new CountAggregatorFactory("count"), new LongSumAggregatorFactory("sum_added", "added"), new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), false), + new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L) }, false From 0d4674fbb4cf8f4862039da775115c901b8f7d1f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 29 Jun 2023 16:35:53 -0700 Subject: [PATCH 10/13] Update benchmarks. --- .../benchmark/DataSketchesHllBenchmark.java | 1 + .../druid/benchmark/query/SqlBenchmark.java | 39 +++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java index bed30c85c64b..28dc5db99054 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DataSketchesHllBenchmark.java @@ -65,6 +65,7 @@ public class DataSketchesHllBenchmark null, null, null, + null, false ); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java index ba9debb28815..1172d823e024 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmark.java @@ -32,8 +32,10 @@ import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctUtf8SqlAggregator; import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchApproxQuantileSqlAggregator; import org.apache.druid.query.aggregation.datasketches.quantiles.sql.DoublesSketchObjectSqlAggregator; +import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; @@ -405,26 +407,35 @@ public class SqlBenchmark "SELECT * FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100')", "SELECT * FROM foo WHERE dimSequential > '10' AND dimSequential < '8500'", "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential IN ('1', '2', '3', '4', '5', '10', '11', '20', '21', '23', '40', '50', '64', '70', '100') GROUP BY 1, 2", - "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2" - + "SELECT dimSequential, dimZipf, SUM(sumLongSequential) FROM foo WHERE dimSequential > '10' AND dimSequential < '8500' GROUP BY 1, 2", + // 28, 29, 30, 31: Approximate count distinct of strings + "SELECT APPROX_COUNT_DISTINCT_BUILTIN(dimZipf) FROM foo", + "SELECT APPROX_COUNT_DISTINCT_DS_HLL(dimZipf) FROM foo", + "SELECT APPROX_COUNT_DISTINCT_DS_HLL_UTF8(dimZipf) FROM foo", + "SELECT APPROX_COUNT_DISTINCT_DS_THETA(dimZipf) FROM foo" ); @Param({"5000000"}) private int rowsPerSegment; - @Param({"false", "force"}) + // Can be "false", "true", or "force" + @Param({"force"}) private String vectorize; - @Param({"none", "front-coded-4", "front-coded-16"}) + + // Can be "none" or "front-coded-N" + @Param({"none", "front-coded-4"}) private String stringEncoding; - @Param({"4", "5", "6", "7", "8", "10", "11", "12", "19", "21", "22", "23", "26", "27"}) + @Param({"28", "29", "30", "31"}) private String query; - @Param({STORAGE_MMAP, STORAGE_FRAME_ROW, STORAGE_FRAME_COLUMNAR}) + // Can be STORAGE_MMAP, STORAGE_FRAME_ROW, or STORAGE_FRAME_COLUMNAR + @Param({STORAGE_MMAP}) private String storageType; private SqlEngine engine; + @Nullable private PlannerFactory plannerFactory; private final Closer closer = Closer.create(); @@ -520,13 +531,19 @@ private static DruidOperatorTable createOperatorTable() try { final Set extractionOperators = new HashSet<>(); extractionOperators.add(CalciteTests.INJECTOR.getInstance(QueryLookupOperatorConversion.class)); - final Set aggregators = new HashSet<>(); - aggregators.add(CalciteTests.INJECTOR.getInstance(DoublesSketchApproxQuantileSqlAggregator.class)); - aggregators.add(CalciteTests.INJECTOR.getInstance(DoublesSketchObjectSqlAggregator.class)); final ApproxCountDistinctSqlAggregator countDistinctSqlAggregator = new ApproxCountDistinctSqlAggregator(new HllSketchApproxCountDistinctSqlAggregator()); - aggregators.add(new CountSqlAggregator(countDistinctSqlAggregator)); - aggregators.add(countDistinctSqlAggregator); + final Set aggregators = new HashSet<>( + ImmutableList.of( + new DoublesSketchApproxQuantileSqlAggregator(), + new DoublesSketchObjectSqlAggregator(), + new HllSketchApproxCountDistinctSqlAggregator(), + new HllSketchApproxCountDistinctUtf8SqlAggregator(), + new ThetaSketchApproxCountDistinctSqlAggregator(), + new CountSqlAggregator(countDistinctSqlAggregator), + countDistinctSqlAggregator + ) + ); return new DruidOperatorTable(aggregators, extractionOperators); } catch (Exception e) { From 479519dd2cfd521bb4ad01e6b2299cf9ca5de6fb Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 29 Jun 2023 17:25:13 -0700 Subject: [PATCH 11/13] Updates. --- .../hll/HllSketchAggregatorFactory.java | 2 +- .../datasketches/hll/HllSketchBuildUtil.java | 10 +- .../datasketches/hll/HllSketchHolder.java | 6 + .../hll/HllSketchAggregatorTest.java | 189 +++--------------- .../hll/sql/HllSketchSqlAggregatorTest.java | 24 ++- 5 files changed, 60 insertions(+), 171 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index 265e29173684..a8393203029e 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -264,7 +264,7 @@ public byte[] getCacheKey() .appendString(fieldName) .appendInt(lgK) .appendInt(tgtHllType.ordinal()) - .appendInt(stringEncoding.ordinal()) + .appendCacheable(stringEncoding) .build(); } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java index af0ac097eef6..bcd4c4eb6d90 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildUtil.java @@ -45,10 +45,7 @@ public static void updateSketch(final HllSketch sketch, final StringEncoding str // noinspection rawtypes for (Object entry : (List) value) { if (entry != null) { - final String asString = entry.toString(); - if (!NullHandling.isNullOrEquivalent(asString)) { - updateSketchWithString(sketch, stringEncoding, asString); - } + updateSketchWithString(sketch, stringEncoding, entry.toString()); } } } else if (value instanceof char[]) { @@ -78,8 +75,7 @@ public static void updateSketchWithDictionarySelector( sketch.update(buf); } } else { - final String s = NullHandling.nullToEmptyIfNeeded(selector.lookupName(id)); - updateSketchWithString(sketch, stringEncoding, s); + updateSketchWithString(sketch, stringEncoding, selector.lookupName(id)); } } @@ -89,7 +85,7 @@ private static void updateSketchWithString( @Nullable final String value ) { - if (value == null) { + if (NullHandling.isNullOrEquivalent(value)) { return; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolder.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolder.java index 81748bd61809..df0b884eaaec 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolder.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolder.java @@ -162,4 +162,10 @@ public HllSketchHolder merge(HllSketchHolder other) return this; } } + + @Override + public String toString() + { + return "HllSketchHolder{" + (union != null ? union.toString() : sketch.toString()) + "}"; + } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index 7f393830f536..b8acb0ce2c22 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -135,10 +135,28 @@ public void ingestSketchesTimeseries() throws Exception final String queryJson = buildTimeseriesQueryJson("HLLSketchMerge", "sketch", !ROUND); File segmentDir1 = timeseriesFolder.newFolder(); - timeseriesHelper.createIndex(inputFile, parserJson, aggregators, segmentDir1, minTimestamp, gran, maxRowCount, true); + timeseriesHelper.createIndex( + inputFile, + parserJson, + aggregators, + segmentDir1, + minTimestamp, + gran, + maxRowCount, + true + ); File segmentDir2 = timeseriesFolder.newFolder(); - timeseriesHelper.createIndex(inputFile, parserJson, aggregators, segmentDir2, minTimestamp, gran, maxRowCount, true); + timeseriesHelper.createIndex( + inputFile, + parserJson, + aggregators, + segmentDir2, + minTimestamp, + gran, + maxRowCount, + true + ); Sequence seq = timeseriesHelper.runQueryOnSegments(Arrays.asList(segmentDir1, segmentDir2), queryJson); List results = seq.toList(); @@ -166,41 +184,6 @@ public void buildSketchesAtIngestionTime() throws Exception Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200, (double) row.get(0), 0.1); - - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketch; - - if (stringEncoding == StringEncoding.UTF16LE) { - expectedSketch = - "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAmeJQYLkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsW9l4IGfQIChp8/wQbxkIHHXaYC+0y0Q" - + "eEPH8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEBwlswAVDtA0dRTzeBMx7tApHhJILSYB9CEpS4QVQcMsF3n" - + "SbBFmoKhRbbokL+kJiCl44/AVlpqEEcer/Bmd8iQlofI0Ha7heB23oKARuPnMGcb7fB3VY4wh55qsHe/h1CHzYUwamr80Hf16DCYBwzg" - + "i+g0wFhMhvBIsKgAiNhmIGjmBkB6/iiw/pUEcGhBAcBGZmVAeh9REFoCxhD6Es1Q+l4CwJqlRpBKsM1gfMxd0KtIwxCrYsHQ63UowVuQ" - + "qjB73OIAy+OuwHweJIBwN+PQWWXBwLxubHCsnuvgfLEGcHzPAPB86SMgTLCPAIiUmHB9VePwbYkMUG3sKGCyK9lQoWlpkH5/BcB+m2wg" - + "ftNIEGS0tPBviYvAv5SLUGtEUfBvtiMwX9viUE/h7SDgGNNQwCBQMFB6dcBPVhlwcJ77sLC+ebCw3dygoP3Z4FEMGyBxFLpAUf2bsHIu" - + "3JBSXnTwUq3/4SK2mVBy5/7QQvcwIFMCk0CTLbWwQ0P+4ENkkTCjgt+Qw8tXoGQtEPE+lYCgdEqUwLxgTJDEv9gQdN4W4ETsFoCU/zNw" - + "ZTpzEPVE3EBlXvPARXHaIEW3+oE1yZzAZhI7EEY583EWTztAVlcwoGZi0iGiKPGAS/Z6MFezmHBIFVHReGx8UIibNbBYEZzAiQowUIms" - + "sVB5wTzQahxdAFpMWTDKWjjwims/4Fqbd7BaoRyh2tsZwLrwXmBLET4gu0azkFuAWtBry3Dge+PwUNvy9jBMCzegYEfLsGa7LSCsVzoA" - + "rKO+IGzGUaC8/XMgTRD58H1YNvBtY1lwUaeD8FyRyaC+3c8Qfd44IT9U2hBOnuvQzjn2QHvifoBufnqw44YtMM6mGgB/7YVQXyGboGPG" - + "+1E/SHpQr1M0UH4BX8BPkTeQX602UF/ZESEf/DRQs="; - } else { - expectedSketch = - "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDgsQ4gcOMh8IEJhlExuaBgd+R1EH/lKSDH1cvxsv+IIEue3UBjIAygY0jNQENawDBotViQ" - + "Y3iIcHO6wsBj80qwZE5r8GRuL2DEfMjAdJeDYHUXQ6BVWetwlWaGwLWNhTBFpAbQZeliYI8eOEBGiAJwVp1isGd94kCHgYHhGqpg8GwH" - + "9VBoB4UAqcyxUQheCsGIciaw6JvKgGi1xBDI1i0gWPLNkOkBDgC5KA9AqTFiQIl8TGBplo/wSeZFwGow5EBeOCjw2mdoIEqaqsBarS1w" - + "+u1ioPr7ywCbJsyge5+eEFuIw5BbncbgXaxlcEwk4JFcQy2QnHhJ4Uywa4DtEkEQbU9qIL2STtD9rcygTbYAUI2wIoBd1GHAbi8uYF48" - + "BYEuZUJAxSK2sI6ZwIBusYkRTs1pQEuWlaCLk9MQX4wqoaAPb9CvyuJgf+SEwE/06oCQD7lwcBH8MGAgHuGHgVqw0Ic04JDoWnEg/H5Q" - + "oRlZkVEm2xBhS/lQYa/b8THXcwBh9TsgUikxwQJZ1FHoifNwony8ILflUbBSppxwsrHTsELX0JBi6PYgkxL10GMl8nDioLmgXskNwFOf" - + "WCCTyZYg0+8YgKQQMdDE3pbA5OGScOEIDyBFJJ9QdXBV4EW62RDmTjrwUA7pwVZwecDGnP9gZrVRsGbbOpEXHRsB52LxEGdxPyDnivTw" - + "n+PBgEe02DBH3nEAR+vUoaf18LBlFkNgSFaRsKhvktCIj36guL8yAbj++8BpGfJwWTDXkblLFVBpdT5gWYB1IRmiNTD7XNXwqeA2UP6b" - + "SaBTHddAbej8sEqzU0C6z5YA6vpRcKsve8Fzl5HAk88agZt6tFCFJn6hO5t5kFvN2cC71rfga/QaAOwL3zCsM59wbFv4gEmViuBciRjg" - + "bJSXQMy9GbEOPSBQTNd2QEzzmRES+iLxLT8ZEF1K3oKNa36gTYC/0JDpijBN1h7AneDRAe35diD+ENjwnio30H5KVqFOjHFgbppaIS6x" - + "dHBotkQRbzC3EFUke5CvYXVwn3CZwI+jVbBv4n3xE="; - } - - Assert.assertEquals("\"" + expectedSketch + "\"", groupByHelper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -243,43 +226,6 @@ public void buildSketchesAtQueryTime() throws Exception Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200, (double) row.get(0), 0.1); - - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketch; - - if (stringEncoding == StringEncoding.UTF16LE) { - expectedSketch = - "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAmeJQYLkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsW9l4IGfQIChp8/wQbxkIHHXaYC+0y0QeE" - + "PH8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEBwlswAVDtA0d6bbCB0U83gTMe7QKR4SSC0mAfQhKUuEFUHDLBd" - + "50mwRZqCoUW26JC144/AVlpqEEcer/Bmd8iQlofI0HtGs5BWu4Xgdt6CgEbj5zBnG+3wd1WOMIeearB3v4dQh82FMGpq/NB39egwmAcM4I" - + "voNMBYTIbwSLCoAIjYZiBo5gZAev4osP6VBHBoQQHAQ8tXoGZmZUB6H1EQWgLGEPoSzVD6XgLAmqVGkEqwzWB8zF3Qq0jDEKtiwdDrdSjB" - + "W5CqMHvc4gDL467AfB4kgHA349BZZcHAvG5scKye6+B8sQZwfM8A8HzpIyBMsI8AiJSYcH1V4/BtiQxQbewoYLIr2VChaWmQfn8FwH6VgK" - + "B+00gQZLS08G+Ji8C/lItQb6QmIK+2IzBf2+JQT+HtIOAY01DAIFAwUHp1wE9WGXBwnvuwsL55sLDd3KCg/dngUQwbIHEUukBR/Zuwci7c" - + "kFJedPBSrf/hIraZUHLn/tBC9zAgUwKTQJMttbBDQ/7gQ2SRMKOC35DDxvtRNC0Q8T9U2hBESpTAvGBMkMS/2BB03hbgROwWgJT/M3BlOn" - + "MQ9UTcQGVe88BFcdogRbf6gTXJnMBmEjsQRjnzcRZPO0BWVzCgZmLSIaIo8YBL9nowV7OYcEgVUdF4bHxQiJs1sFgRnMCJCjBQiayxUHnB" - + "PNBqHF0AWkxZMMpaOPCKaz/gWpt3sFqhHKHa2xnAuvBeYEsRPiC7RFHwa4Ba0GvLcOB74/BQ2/L2MEwLN6BgR8uwZrstIKxXOgCso74gbM" - + "ZRoLz9cyBNEPnwfVg28G1jWXBRp4PwXJHJoL7dzxB93jghPp7r0M459kB74n6Abn56sOOGLTDOphoAf+2FUF8hm6BvSHpQr1M0UH4BX8BP" - + "kTeQX602UF/ZESEf/DRQs="; - } else { - // UTF-8 - expectedSketch = - "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDgsQ4gcOMh8IEJhlExuaBgd+R1EHL/iCBLnt1AYyAMoGNIzUBDWsAwaLVYkGN4iHBzusLAY" - + "/NKsGROa/Bkbi9gxHzIwHSXg2B6qmDwZRZDYEVZ63CVZobAtY2FMEWkBtBl6WJghogCcFadYrBnfeJAh4GB4RfVy/G4B4UAqF4KwYhyJr" - + "DjyZYg2LZEEWjWLSBY8s2Q6QEOALkoD0CpMWJAi5t5kF2tzKBJfExgaZaP8EnmRcBqMORAXjgo8NpnaCBKmqrAWq0tcPrtYqD6+8sAmyb" - + "MoHufnhBbiMOQW53G4Fwk4JFcQy2QnHhJ4Uywa4DtEkEQbU9qIL2STtD9rGVwTbYAUI2wIoBd1GHAbi8uYF48BYEuZUJAyJvKgG6ZwIBu" - + "sYkRTskNwFAO6cFbk9MQX4wqoa6bSaBQD2/Qr8riYH/lKSDP9OqAkA+5cHAR/DBgIB7hgIc04JDoWnEg/H5QoRlZkVEm2xBhS/lQYa/b8" - + "THXcwBh9TsgUikxwQ/khMBCWdRR6InzcKJ8vCC35VGwUqaccLKx07BC19CQYuj2IJMS9dBjJfJw4qC5oFOfWCCYtcQQw88agZPvGICkED" - + "HQxN6WwOThknDnivTwkQgPIEUitrCFcFXgRbrZEOZOOvBWcHnAxpz/YGa1UbBm2zqRFx0bAedi8RBncT8g54FasN/jwYBHtNgwR95xAEf" - + "r1KGn9fCwaFaRsKhvktCIj36guL8yAbUXQ6BY/vvAaRnycFkw15G5SxVQaXU+YFmAdSEZojUw+cyxUQngNlD8C98wox3XQG3o/LBKs1NA" - + "us+WAOr6UXCrL3vBc5eRwJtc1fCrerRQhSZ+oTuWlaCLzdnAu9a34Gv0GgDsB/VQbs1pQEwzn3BsW/iASZWK4FyJGOBslJdAzL0ZsQ49I" - + "FBM13ZATPOZERL6IvEtPxkQXUrego1rfqBNgL/QkOmKME3WHsCd4NEB7fl2IP4Q2PCeKjfQfkpWoU6McWBumlohLrF0cG8eOEBPMLcQVS" - + "R7kK9hdXCVJJ9Qf6NVsG9wmcCP4n3xE=" - ; - } - - Assert.assertEquals("\"" + expectedSketch + "\"", groupByHelper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -350,19 +296,6 @@ public void buildSketchesAtQueryTimeMultiValue() throws Exception Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(14, (double) row.get(0), 0.1); - - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketch; - - if (stringEncoding == StringEncoding.UTF16LE) { - expectedSketch = "AwEHDAUIAAEOAAAAhDx/BKWjjwiJs1sFRTzeBMnuvgfYkMUGyRyaC39egwmJSYcHOGLTDDkQqgg6gNUEGfQICj9ChAc="; - } else { - expectedSketch = "AwEHDAUIAAEOAAAAwH9VBlJJ9QfJSXQMqqYPBq7WKg+PLNkOsve8F9PxkQXYC/0JOfWCCbzdnAsddzAG/lKSDB9TsgU="; - } - - Assert.assertEquals("\"" + expectedSketch + "\"", groupByHelper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -384,31 +317,6 @@ public void roundBuildSketch() throws Exception Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200L, (long) row.get(0)); - - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketch; - - if (stringEncoding == StringEncoding.UTF16LE) { - expectedSketch = - "AwEHDAkIAAHIAAAAAcoDCQOUmAsElgIEcSjfBAmeJQYLkjgKDFoJBQ6yWQUP9hIPONtCBxPgngsW9l4IGfQIChp8/wQbxkIHHXaYC+0y0QeEP" - + "H8EJ3ATBSrq7wYrds8ELKKxBjH4RAszAMEFOJLSDzkQqgg6gNUEP0KEBwlswAVDtA0d6bbCB0U83gTMe7QKR4SSC0mAfQhKUuEFUHDLBd50" - + "mwRZqCoUW26JC144/AVlpqEEcer/Bmd8iQlofI0HtGs5BWu4Xgdt6CgEbj5zBnG+3wd1WOMIeearB3v4dQh82FMGpq/NB39egwmAcM4IvoN" - + "MBYTIbwSLCoAIjYZiBo5gZAev4osP6VBHBoQQHAQ8tXoGZmZUB6H1EQWgLGEPoSzVD6XgLAmqVGkEqwzWB8zF3Qq0jDEKtiwdDrdSjBW5Cq" - + "MHvc4gDL467AfB4kgHA349BZZcHAvG5scKye6+B8sQZwfM8A8HzpIyBMsI8AiJSYcH1V4/BtiQxQbewoYLIr2VChaWmQfn8FwH6VgKB+00g" - + "QZLS08G+Ji8C/lItQb6QmIK+2IzBf2+JQT+HtIOAY01DAIFAwUHp1wE9WGXBwnvuwsL55sLDd3KCg/dngUQwbIHEUukBR/Zuwci7ckFJedP" - + "BSrf/hIraZUHLn/tBC9zAgUwKTQJMttbBDQ/7gQ2SRMKOC35DDxvtRNC0Q8T9U2hBESpTAvGBMkMS/2BB03hbgROwWgJT/M3BlOnMQ9UTcQ" - + "GVe88BFcdogRbf6gTXJnMBmEjsQRjnzcRZPO0BWVzCgZmLSIaIo8YBL9nowV7OYcEgVUdF4bHxQiJs1sFgRnMCJCjBQiayxUHnBPNBqHF0A" - + "WkxZMMpaOPCKaz/gWpt3sFqhHKHa2xnAuvBeYEsRPiC7RFHwa4Ba0GvLcOB74/BQ2/L2MEwLN6BgR8uwZrstIKxXOgCso74gbMZRoLz9cyB" - + "NEPnwfVg28G1jWXBRp4PwXJHJoL7dzxB93jghPp7r0M459kB74n6Abn56sOOGLTDOphoAf+2FUF8hm6BvSHpQr1M0UH4BX8BPkTeQX602UF" - + "/ZESEf/DRQs="; - } else { - expectedSketch = - "AwEHDAkIAAHIAAAAAPp4CQGkVQwCbBkJBQKkDotkQRYLEOIHDjIfCBCYZRMbmgYHfkdRB7lpWgj+UpIML/iCBLnt1AYyAMoGNIzUBDWsAwaLVYkGN4iHBzusLAY/NKsGROa/Bkbi9gxHzIwHSXg2B6qmDwZRdDoFVZ63CVZobAtY2FMEWkBtBl6WJghogCcFadYrBnfeJAh4GB4RfVy/G4B4UAqcyxUQheCsGIciaw6JvKgGi1xBDI1i0gWPLNkOkBDgC5KA9AqTFiQIl8TGBplo/wSeZFwGow5EBeOCjw2mdoIEqaqsBarS1w+u1ioPr7ywCbJsyge5+eEFuIw5BbncbgXaxlcEwk4JFcQy2QnHhJ4Uywa4DtEkEQbU9qIL2STtD9rcygTbYAUI2wIoBd1GHAbi8uYF48BYEuZUJAxSK2sI6bSaBesYkRTs1pQEAO6cFbk9MQX4wqoaAPb9CvyuJgf+SEwE/06oCQD7lwcBH8MGAgHuGAhzTgkOhacSD8flChGVmRUSbbEGFL+VBhr9vxMddzAGH1OyBSKTHBAlnUUeiJ83CifLwgt+VRsFKmnHCysdOwQtfQkGLo9iCTEvXQYyXycOKguaBeyQ3AXpnAgGOfWCCTyZYg0+8YgKQQMdDE3pbA5OGScOeK9PCRCA8gRSSfUHVwVeBFutkQ5k468FZwecDGnP9gZrVRsGbbOpEXHRsB52LxEGdxPyDngVqw3+PBgEe02DBH3nEAR+vUoaf18LBlFkNgSFaRsKhvktCIj36guL8yAbj++8BpGfJwWTDXkblLFVBpdT5gWYB1IRmiNTD7XNXwqeA2UPwL3zCjHddAbej8sEqzU0C6z5YA6vpRcKsve8Fzl5HAk88agZt6tFCFJn6hO5t5kFvN2cC71rfga/QaAOwH9VBsM59wbFv4gEmViuBciRjgbJSXQMy9GbEOPSBQTNd2QEzzmRES+iLxLT8ZEF1K3oKNa36gTYC/0JDpijBN1h7AneDRAe35diD+ENjwnio30H5KVqFOjHFgbppaIS6xdHBvHjhATzC3EFUke5CvYXVwn3CZwI+jVbBv4n3xE="; - } - - Assert.assertEquals("\"" + expectedSketch + "\"", groupByHelper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -430,24 +338,6 @@ public void roundMergeSketch() throws Exception Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); Assert.assertEquals(200L, (long) row.get(0)); - - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketch = - "AwEHDAkIAAHIAAAAAiK0BAWIUw8GQkoICxROBBVEtgcXLiAGGbKkCRq80AUbIqkIH3iCByBm4xIlXqYJK/L7BixGBQYu5AsOLxoTBzSiYQ42" - + "KrIEN6znB7AS2wY/kEgE/JC2BUZKtwRH1rMNSizzBE744gVTYroEuD/5B9IWcwdeersMY+ISCNeMnQ/VIhcTakA7DWvM/gR0XmkKdwL+B+" - + "AxWRB7iqkJfHS5B4G8XQbeUy0E8ranC4WKqwuHtqoEimJoFo3EiQmT7KIJlH6hFJQHwgSaiDcHnQpsD57kmxjOoO8FoXrkCdSLjASkeLMH" - + "qE7tBKlYMwmqPmMHrjyIEbjsbwmwPp0Hsqi4BrVMrQ+4VqkMk8R2Bb0gUg6+9PgGv842FMOcmh/GYjQHyHokBMqMXgXNWtkMzvBbHzBDXw" - + "7SfIwR1VKVCdd4NAbaXBIH21ItBNxaAQbd9tsG31Z4EXvL9gfppI4TZBO3BvJ8HAb5+PEE+a6eD/yaMREGfe8RDF8FBQ1fegkRy64GFMeI" - + "DxcFewTDsDgEIKkZDiI76wUkz84NKjnxBi9TdhMwRVkLMiViBTWpMQQ2RwkH4K6sCDmLKAc7aa4EPtleBkD7DQ5B19cISe1qBoGehQfp0i" - + "gFU4PaB1R7Rw9Zf9QN753zEPTT8gthmacHZD0WBWXfihDyeFwMbeUdB27FNAZvy0oHcMl2B3FDyAl1gWYHed9lBHtl5ggfuiIIfSM8CoDB" - + "8wmBrzIEhGnMBYYv+Q2IJ6AEiV2yDI1JrAWPw7AG9D3fD5TDIQSXvygMmUlCBJq7YwSeLWoHn6ugDMPdUQSj85wKpG+rBqYRUQSpO7UHqn" - + "OHFq23zAiwW0YS58+VF7YvCQa3Gc8HuAH8Erl/PgW7yQEFvhmBBQZzZQTB6RcFwnHPBakaJQfEtZ8HxbGmEcYZagTL18IEywPsBs5JcwXP" - + "4UgE0U97BdMTZwXUOwoH1ZcUB9bP8QY7vb0M2RtKCNsXmgrdn3oL3tsZC+DtSQfhpXQG7eg9BeYLowvnUe4Ek1QxBe8t9wfzofoJl7tgGv" - + "XpZg32cfIG+2nDBvwtQgo="; - - Assert.assertEquals("\"" + expectedSketch + "\"", groupByHelper.getObjectMapper().writeValueAsString(row.get(1))); } @Test @@ -516,22 +406,6 @@ public void testPostAggs() throws Exception + " OutOfOrder Flag: false\n" + " Coupon Count : 200\n"; - // Check specific sketch: result should be deterministic given same inputs in same order. This helps makes sure - // that our logic remains compatible across versions. - - final String expectedSketchBase64 = - "AwEHDAkIAAHIAAAAAiK0BAWIUw8GQkoICxROBBVEtgcXLiAGGbKkCRq80AUbIqkIH3iCByBm4xIlXqYJK/L7BixGBQYu5AsOLxoTBzSiYQ42" - + "KrIEN6znB7AS2wY/kEgE/JC2BUZKtwRH1rMNSizzBE744gVTYroEuD/5B9IWcwdeersMY+ISCNeMnQ/VIhcTakA7DWvM/gR0XmkKdwL+B+" - + "AxWRB7iqkJfHS5B4G8XQbeUy0E8ranC4WKqwuHtqoEimJoFo3EiQmT7KIJlH6hFJQHwgSaiDcHnQpsD57kmxjOoO8FoXrkCdSLjASkeLMH" - + "qE7tBKlYMwmqPmMHrjyIEbjsbwmwPp0Hsqi4BrVMrQ+4VqkMk8R2Bb0gUg6+9PgGv842FMOcmh/GYjQHyHokBMqMXgXNWtkMzvBbH9J8jB" - + "HVUpUJ13g0BtpcEgfbUi0E3FoBBt322wbfVngRe8v2B+mkjhNkE7cG8nwcBvn48QT5rp4P/JoxEQZ97xEMXwUFDV96CRHLrgYUx4gPFwV7" - + "BMOwOAQgqRkOIjvrBSTPzg0qOfEG753zEC9TdhMwQ18OMiViBTWpMQQ2RwkH4K6sCDmLKAc7aa4EPtleBkD7DQ5B19cISe1qBoGehQfp0i" - + "gFU4PaB1R7Rw9Zf9QN0xNnBfTT8gthmacHZD0WBWXfihDyeFwMbeUdB27FNAZvy0oHcMl2B3FDyAl1gWYHed9lBHtl5ggfuiIIfSM8CoDB" - + "8wmBrzIEhGnMBYYv+Q2IJ6AEiV2yDI1JrAWPw7AG9D3fD5TDIQSXvygMmUlCBJq7YwSeLWoHn6ugDMPdUQSj85wKpG+rBqYRUQSpO7UHqn" - + "OHFq23zAiwW0YS58+VF7YvCQa3Gc8HuAH8Erl/PgW7yQEFvhmBBQZzZQTB6RcFwnHPBakaJQfEtZ8HxbGmEcYZagTL18IEywPsBs5JcwXP" - + "4UgE0U97BTBFWQvUOwoH1ZcUB9bP8QY7vb0M2RtKCNsXmgrdn3oL3tsZC+DtSQfhpXQG7eg9BeYLowvnUe4Ek1QxBe8t9wfzofoJl7tgGv" - + "XpZg32cfIG+2nDBvwtQgo="; - List results = seq.toList(); Assert.assertEquals(1, results.size()); ResultRow row = results.get(0); @@ -541,14 +415,6 @@ public void testPostAggs() throws Exception Assert.assertEquals(expectedSummary, row.get(3)); // union with self = self Assert.assertEquals(expectedSummary, ((HllSketchHolder) row.get(4)).getSketch().toString()); - Assert.assertEquals( - "\"" + expectedSketchBase64 + "\"", - groupByHelper.getObjectMapper().writeValueAsString(row.get(4)) - ); - Assert.assertEquals( - "\"" + expectedSketchBase64 + "\"", - groupByHelper.getObjectMapper().writeValueAsString(row.get(5)) - ); } private static String buildParserJson(List dimensions, List columns) @@ -612,13 +478,14 @@ private static Map buildAggregatorObject( StringEncoding stringEncoding ) { - return ImmutableMap.of( - "type", aggregationType, - "name", "sketch", - "fieldName", aggregationFieldName, - "round", aggregationRound, - "stringEncoding", stringEncoding.toString() - ); + return ImmutableMap.builder() + .put("type", aggregationType) + .put("name", "sketch") + .put("fieldName", aggregationFieldName) + .put("round", aggregationRound) + .put("tgtHllType", "HLL_8") + .put("stringEncoding", stringEncoding.toString()) + .build(); } private String buildGroupByQueryJson( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index f399a3a514e7..eec6cd602641 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -133,6 +133,11 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest 2L }; + /** + * Expected virtual columns for {@link #testHllSketchPostAggsTimeseries()}, + * {@link #testHllSketchPostAggsGroupBy()}, {@link #testHllSketchFilteredAggregatorsTimeseries()}, and + * {@link #testHllSketchFilteredAggregatorsGroupBy()}. + */ private static final List EXPECTED_PA_VIRTUAL_COLUMNS = ImmutableList.of( new ExpressionVirtualColumn( @@ -149,6 +154,9 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest ) ); + /** + * Expected aggregators for {@link #testHllSketchPostAggsTimeseries()} and {@link #testHllSketchPostAggsGroupBy()}. + */ private static final List EXPECTED_PA_AGGREGATORS = ImmutableList.of( new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, null, false, true), @@ -160,12 +168,20 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest new HllSketchBuildAggregatorFactory("a6", "dim2", null, null, StringEncoding.UTF8, true, true) ); + /** + * Expected aggregators for {@link #testHllSketchFilteredAggregatorsTimeseries()} and + * {@link #testHllSketchFilteredAggregatorsGroupBy()}. + */ private static final List EXPECTED_FILTERED_AGGREGATORS = EXPECTED_PA_AGGREGATORS.stream() .limit(5) .map(factory -> new FilteredAggregatorFactory(factory, selector("dim2", "a", null))) .collect(Collectors.toList()); + /** + * Expected post-aggregators for {@link #testHllSketchPostAggsTimeseries()} and + * {@link #testHllSketchPostAggsGroupBy()}. + */ private static final List EXPECTED_PA_POST_AGGREGATORS = ImmutableList.of( new HllSketchToEstimatePostAggregator("p1", new FieldAccessPostAggregator("p0", "a0"), false), @@ -182,6 +198,10 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest new HllSketchToEstimatePostAggregator("p20", new FieldAccessPostAggregator("p19", "a0"), true) ); + /** + * Expected post-aggregators for {@link #testHllSketchFilteredAggregatorsTimeseries()} and + * {@link #testHllSketchFilteredAggregatorsGroupBy()}. + */ private static final List EXPECTED_FILTERED_POST_AGGREGATORS = ImmutableList.of( new HllSketchToEstimatePostAggregator("p1", new FieldAccessPostAggregator("p0", "a0"), false), @@ -328,9 +348,9 @@ public void testApproxCountDistinctHllSketch() ), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, null, null, ROUND), - new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", null, null,ROUND), + new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", null, null, ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, null, null, ROUND), - new HllSketchMergeAggregatorFactory("a7", "hllsketch_dim1", 21, "HLL_4", null,null, ROUND) + new HllSketchMergeAggregatorFactory("a7", "hllsketch_dim1", 21, "HLL_4", null, null, ROUND) ) ) .context(QUERY_CONTEXT_DEFAULT) From 7ebd1e44ba1c31c9c2bdd72d3b9f82fea8c8bf7c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 29 Jun 2023 17:36:36 -0700 Subject: [PATCH 12/13] Fix conflict. --- .../coordinator/duty/ITAutoCompactionTest.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java index 83fa39be54f3..3c40affa7834 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/coordinator/duty/ITAutoCompactionTest.java @@ -266,13 +266,16 @@ public void testAutoCompactionRowWithMetricAndRowWithoutMetricShouldPreserveExis new CountAggregatorFactory("count"), new LongSumAggregatorFactory("sum_added", "added"), new SketchMergeAggregatorFactory("thetaSketch", "user", 16384, true, false, null), -<<<<<<< HEAD - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), null, false), - new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L) -======= - new HllSketchBuildAggregatorFactory("HLLSketchBuild", "user", 12, TgtHllType.HLL_4.name(), false, false), + new HllSketchBuildAggregatorFactory( + "HLLSketchBuild", + "user", + 12, + TgtHllType.HLL_4.name(), + null, + false, + false + ), new DoublesSketchAggregatorFactory("quantilesDoublesSketch", "delta", 128, 1000000000L, null) ->>>>>>> master }, false ); From 784cb4c1926e4f512e7588ebecc4ca1427cb87b9 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 30 Jun 2023 08:33:02 -0700 Subject: [PATCH 13/13] Adjustments. --- .../hll/HllSketchAggregatorFactory.java | 2 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 36 +++++++++--------- ...ngDefaultUTF16LEJsonIncludeFilterTest.java | 37 +++++++++++++++++++ 3 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilterTest.java diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java index a8393203029e..4bc734dc0051 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorFactory.java @@ -24,9 +24,9 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; import org.apache.datasketches.hll.Union; +import org.apache.druid.jackson.DefaultTrueJsonIncludeFilter; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.StringEncodingDefaultUTF16LEJsonIncludeFilter; -import org.apache.druid.jackson.DefaultTrueJsonIncludeFilter; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.ObjectAggregateCombiner; diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index eec6cd602641..498bb06d9afd 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -110,25 +110,25 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest "\"AgEHDAMIBgC1EYgH1mlHBwsKPwu5SK8MIiUxB7iZVwU=\"", 2L, "### HLL SKETCH SUMMARY: \n" - + " Log Config K : 12\n" - + " Hll Target : HLL_4\n" - + " Current Mode : LIST\n" - + " Memory : false\n" - + " LB : 2.0\n" - + " Estimate : 2.000000004967054\n" - + " UB : 2.000099863468538\n" - + " OutOfOrder Flag: false\n" - + " Coupon Count : 2\n", + + " Log Config K : 12\n" + + " Hll Target : HLL_4\n" + + " Current Mode : LIST\n" + + " Memory : false\n" + + " LB : 2.0\n" + + " Estimate : 2.000000004967054\n" + + " UB : 2.000099863468538\n" + + " OutOfOrder Flag: false\n" + + " Coupon Count : 2\n", "### HLL SKETCH SUMMARY: \n" - + " LOG CONFIG K : 12\n" - + " HLL TARGET : HLL_4\n" - + " CURRENT MODE : LIST\n" - + " MEMORY : FALSE\n" - + " LB : 2.0\n" - + " ESTIMATE : 2.000000004967054\n" - + " UB : 2.000099863468538\n" - + " OUTOFORDER FLAG: FALSE\n" - + " COUPON COUNT : 2\n", + + " LOG CONFIG K : 12\n" + + " HLL TARGET : HLL_4\n" + + " CURRENT MODE : LIST\n" + + " MEMORY : FALSE\n" + + " LB : 2.0\n" + + " ESTIMATE : 2.000000004967054\n" + + " UB : 2.000099863468538\n" + + " OUTOFORDER FLAG: FALSE\n" + + " COUPON COUNT : 2\n", 2.0, 2L }; diff --git a/processing/src/test/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilterTest.java b/processing/src/test/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilterTest.java new file mode 100644 index 000000000000..4b93291d779c --- /dev/null +++ b/processing/src/test/java/org/apache/druid/java/util/common/StringEncodingDefaultUTF16LEJsonIncludeFilterTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.java.util.common; + +import org.junit.Assert; +import org.junit.Test; + +public class StringEncodingDefaultUTF16LEJsonIncludeFilterTest +{ + private final StringEncodingDefaultUTF16LEJsonIncludeFilter filter = + new StringEncodingDefaultUTF16LEJsonIncludeFilter(); + + @Test + @SuppressWarnings({"SimplifiableAssertion", "EqualsBetweenInconvertibleTypes"}) + public void testFilter() + { + Assert.assertTrue(filter.equals(StringEncoding.UTF16LE)); + Assert.assertFalse(filter.equals(StringEncoding.UTF8)); + } +}