From 7917b49e8667758f10dc8f8244bf56d019445962 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sun, 19 Nov 2017 23:52:46 -0800 Subject: [PATCH] Add DimensionSelector id -> X caches. Adds the method DimensionSelectorUtils.cacheIfPossible to aid with creation of caches for evaluating functions of dimension dictionary IDs. Uses an array cache when possible, an LRU cache when it seems like it might be a good idea, and skips caching otherwise. The caches are used in three places in this patch: - Cardinality aggregator (caches hashes) - Single-dimension-input expression selector (caches expression results) - Generic expression selector (caches name for any dimension inputs) Benchmarking showed that the LRU cache could introduce unwanted overhead for dimensions with few repeating values, so this patch implements two mitigations: - Skip caching completely if cardinality is >90% of the row count. - After iterating through a few multiples of the cache size, examine the cache hit rate and decide to either freeze it (the cache becomes read-only) or ignore it (the cache is cleared and future operations are uncached). The caches aim to be <1MB per column. --- benchmarks/pom.xml | 6 + docs/content/configuration/historical.md | 2 +- .../cardinality/CardinalityAggregator.java | 6 +- ...alityAggregatorColumnSelectorStrategy.java | 9 +- ...gregatorColumnSelectorStrategyFactory.java | 13 ++- ...alityAggregatorColumnSelectorStrategy.java | 18 ++- ...alityAggregatorColumnSelectorStrategy.java | 14 ++- ...alityAggregatorColumnSelectorStrategy.java | 18 ++- ...alityAggregatorColumnSelectorStrategy.java | 44 ++++--- .../dimension/ArrayCacheIntFunction.java | 56 +++++++++ .../ColumnSelectorStrategyFactory.java | 6 +- .../query/dimension/LruCacheIntFunction.java | 109 ++++++++++++++++++ ...eMatcherColumnSelectorStrategyFactory.java | 2 +- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../epinephelinae/RowBasedGrouperHelper.java | 2 +- .../druid/query/search/SearchQueryRunner.java | 2 +- .../druid/query/select/SelectQueryEngine.java | 2 +- .../TopNColumnSelectorStrategyFactory.java | 2 +- .../druid/segment/ColumnSelectorFactory.java | 11 ++ .../druid/segment/DimensionHandlerUtils.java | 9 +- .../druid/segment/DimensionSelectorUtils.java | 38 ++++++ .../QueryableIndexColumnSelectorFactory.java | 6 + ...IncrementalIndexColumnSelectorFactory.java | 6 + .../segment/virtual/ExpressionSelectors.java | 24 +++- ...tCachingExpressionColumnValueSelector.java | 85 +++++--------- .../VirtualizedColumnSelectorFactory.java | 6 + .../CardinalityAggregatorBenchmark.java | 3 +- .../CardinalityAggregatorTest.java | 36 +++--- .../ExpressionColumnValueSelectorTest.java | 4 +- 29 files changed, 409 insertions(+), 132 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/dimension/ArrayCacheIntFunction.java create mode 100644 processing/src/main/java/io/druid/query/dimension/LruCacheIntFunction.java diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index 8ec16d5a9226..5113e91e1de3 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -67,6 +67,12 @@ ${project.parent.version} test-jar + + io.druid + druid-server + ${project.parent.version} + test-jar + io.druid druid-sql diff --git a/docs/content/configuration/historical.md b/docs/content/configuration/historical.md index 04390a3f468a..0914449cf487 100644 --- a/docs/content/configuration/historical.md +++ b/docs/content/configuration/historical.md @@ -63,7 +63,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| |`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)| -|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| +|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default, since most query features that would benefit from it already implement their own caching (with the notable exception of the JavaScript aggregator). Enabling this may require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| |`druid.processing.fifo`|If the processing queue should treat tasks of equal priority in a FIFO manner|`false`| |`druid.processing.tmpDir`|Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default `java.io.tmpdir` path.|path represented by `java.io.tmpdir`| diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index 5b71dcb1584d..90f8116e25af 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -50,7 +50,7 @@ static void hashRow( } ColumnSelectorPlus selectorPlus = selectorPluses[k]; - selectorPlus.getColumnSelectorStrategy().hashRow(selectorPlus.getSelector(), hasher); + selectorPlus.getColumnSelectorStrategy().hashRow(hasher); } collector.add(hasher.hash().asBytes()); } @@ -61,7 +61,7 @@ static void hashValues( ) { for (final ColumnSelectorPlus selectorPlus : selectorPluses) { - selectorPlus.getColumnSelectorStrategy().hashValues(selectorPlus.getSelector(), collector); + selectorPlus.getColumnSelectorStrategy().hashValues(collector); } } @@ -75,7 +75,7 @@ static void hashValues( boolean byRow ) { - this(name, selectorPlusList.toArray(new ColumnSelectorPlus[] {}), byRow); + this(name, selectorPlusList.toArray(new ColumnSelectorPlus[]{}), byRow); } CardinalityAggregator( diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java index 807eb3ae7ae4..0d86b154f0b6 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java @@ -23,22 +23,19 @@ import io.druid.hll.HyperLogLogCollector; import io.druid.query.dimension.ColumnSelectorStrategy; -public interface CardinalityAggregatorColumnSelectorStrategy extends ColumnSelectorStrategy +public interface CardinalityAggregatorColumnSelectorStrategy extends ColumnSelectorStrategy { /*** * Retrieve the current row from dimSelector and add the row values to the hasher. * - * @param dimSelector Dimension value selector * @param hasher Hasher used for cardinality aggregator calculations */ - void hashRow(ValueSelectorType dimSelector, Hasher hasher); - + void hashRow(Hasher hasher); /** * Retrieve the current row from dimSelector and add the row values to HyperLogLogCollector. * - * @param dimSelector Dimension value selector * @param collector HLL collector used for cardinality aggregator calculations */ - void hashValues(ValueSelectorType dimSelector, HyperLogLogCollector collector); + void hashValues(HyperLogLogCollector collector); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java index 17b629c46609..2037049f2ac9 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java @@ -22,6 +22,7 @@ import io.druid.java.util.common.IAE; import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.segment.ColumnValueSelector; +import io.druid.segment.DimensionSelector; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -30,19 +31,21 @@ public class CardinalityAggregatorColumnSelectorStrategyFactory { @Override public CardinalityAggregatorColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, + ColumnValueSelector selector, + int numRows ) { ValueType type = capabilities.getType(); switch (type) { case STRING: - return new StringCardinalityAggregatorColumnSelectorStrategy(); + return new StringCardinalityAggregatorColumnSelectorStrategy((DimensionSelector) selector, numRows); case LONG: - return new LongCardinalityAggregatorColumnSelectorStrategy(); + return new LongCardinalityAggregatorColumnSelectorStrategy(selector); case FLOAT: - return new FloatCardinalityAggregatorColumnSelectorStrategy(); + return new FloatCardinalityAggregatorColumnSelectorStrategy(selector); case DOUBLE: - return new DoubleCardinalityAggregatorColumnSelectorStrategy(); + return new DoubleCardinalityAggregatorColumnSelectorStrategy(selector); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/DoubleCardinalityAggregatorColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/DoubleCardinalityAggregatorColumnSelectorStrategy.java index 5fd15ae30c1b..3ce1c11dcc60 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/DoubleCardinalityAggregatorColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/DoubleCardinalityAggregatorColumnSelectorStrategy.java @@ -25,18 +25,24 @@ import io.druid.segment.BaseDoubleColumnValueSelector; -public class DoubleCardinalityAggregatorColumnSelectorStrategy - implements CardinalityAggregatorColumnSelectorStrategy +public class DoubleCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy { + private final BaseDoubleColumnValueSelector selector; + + public DoubleCardinalityAggregatorColumnSelectorStrategy(final BaseDoubleColumnValueSelector selector) + { + this.selector = selector; + } + @Override - public void hashRow(BaseDoubleColumnValueSelector dimSelector, Hasher hasher) + public void hashRow(Hasher hasher) { - hasher.putDouble(dimSelector.getDouble()); + hasher.putDouble(selector.getDouble()); } @Override - public void hashValues(BaseDoubleColumnValueSelector dimSelector, HyperLogLogCollector collector) + public void hashValues(HyperLogLogCollector collector) { - collector.add(CardinalityAggregator.hashFn.hashLong(Double.doubleToLongBits(dimSelector.getDouble())).asBytes()); + collector.add(CardinalityAggregator.hashFn.hashLong(Double.doubleToLongBits(selector.getDouble())).asBytes()); } } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/FloatCardinalityAggregatorColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/FloatCardinalityAggregatorColumnSelectorStrategy.java index b46261c7b15e..68670e722857 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/FloatCardinalityAggregatorColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/FloatCardinalityAggregatorColumnSelectorStrategy.java @@ -24,17 +24,23 @@ import io.druid.query.aggregation.cardinality.CardinalityAggregator; import io.druid.segment.BaseFloatColumnValueSelector; -public class FloatCardinalityAggregatorColumnSelectorStrategy - implements CardinalityAggregatorColumnSelectorStrategy +public class FloatCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy { + private final BaseFloatColumnValueSelector selector; + + public FloatCardinalityAggregatorColumnSelectorStrategy(final BaseFloatColumnValueSelector selector) + { + this.selector = selector; + } + @Override - public void hashRow(BaseFloatColumnValueSelector selector, Hasher hasher) + public void hashRow(Hasher hasher) { hasher.putFloat(selector.getFloat()); } @Override - public void hashValues(BaseFloatColumnValueSelector selector, HyperLogLogCollector collector) + public void hashValues(HyperLogLogCollector collector) { collector.add(CardinalityAggregator.hashFn.hashInt(Float.floatToIntBits(selector.getFloat())).asBytes()); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/LongCardinalityAggregatorColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/LongCardinalityAggregatorColumnSelectorStrategy.java index a666ed64e1a7..071b4423ea34 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/LongCardinalityAggregatorColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/LongCardinalityAggregatorColumnSelectorStrategy.java @@ -24,18 +24,24 @@ import io.druid.query.aggregation.cardinality.CardinalityAggregator; import io.druid.segment.BaseLongColumnValueSelector; -public class LongCardinalityAggregatorColumnSelectorStrategy - implements CardinalityAggregatorColumnSelectorStrategy +public class LongCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy { + private BaseLongColumnValueSelector selector; + + public LongCardinalityAggregatorColumnSelectorStrategy(final BaseLongColumnValueSelector selector) + { + this.selector = selector; + } + @Override - public void hashRow(BaseLongColumnValueSelector dimSelector, Hasher hasher) + public void hashRow(Hasher hasher) { - hasher.putLong(dimSelector.getLong()); + hasher.putLong(selector.getLong()); } @Override - public void hashValues(BaseLongColumnValueSelector dimSelector, HyperLogLogCollector collector) + public void hashValues(HyperLogLogCollector collector) { - collector.add(CardinalityAggregator.hashFn.hashLong(dimSelector.getLong()).asBytes()); + collector.add(CardinalityAggregator.hashFn.hashLong(selector.getLong()).asBytes()); } } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java index 71661abf91b9..ea145e14a4ee 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java @@ -23,28 +23,42 @@ import io.druid.hll.HyperLogLogCollector; import io.druid.query.aggregation.cardinality.CardinalityAggregator; import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionSelectorUtils; import io.druid.segment.data.IndexedInts; import java.util.Arrays; +import java.util.function.IntFunction; -public class StringCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy +public class StringCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy { - public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; - public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; + private static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; + private static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; + + // Number of entries to cache. Each one is a 128 bit hash, so with overhead, 12500 entries occupies about 250KB + private static final int CACHE_SIZE = 12500; + + private final DimensionSelector selector; + private final IntFunction hashFunction; + + public StringCardinalityAggregatorColumnSelectorStrategy(final DimensionSelector selector, final int numRows) + { + this.selector = selector; + this.hashFunction = DimensionSelectorUtils.cacheIfPossible(selector, this::hashOneValue, numRows, CACHE_SIZE); + } @Override - public void hashRow(DimensionSelector dimSelector, Hasher hasher) + public void hashRow(final Hasher hasher) { - final IndexedInts row = dimSelector.getRow(); + final IndexedInts row = selector.getRow(); final int size = row.size(); // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. if (size == 1) { - final String value = dimSelector.lookupName(row.get(0)); + final String value = selector.lookupName(row.get(0)); hasher.putUnencodedChars(nullToSpecial(value)); } else if (size != 0) { final String[] values = new String[size]; for (int i = 0; i < size; ++i) { - final String value = dimSelector.lookupName(row.get(i)); + final String value = selector.lookupName(row.get(i)); values[i] = nullToSpecial(value); } // Values need to be sorted to ensure consistent multi-value ordering across different segments @@ -59,17 +73,21 @@ public void hashRow(DimensionSelector dimSelector, Hasher hasher) } @Override - public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) + public void hashValues(final HyperLogLogCollector collector) { - IndexedInts row = dimSelector.getRow(); + IndexedInts row = selector.getRow(); for (int i = 0; i < row.size(); i++) { - int index = row.get(i); - final String value = dimSelector.lookupName(index); - collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(nullToSpecial(value)).asBytes()); + collector.add(hashFunction.apply(row.get(i))); } } - private String nullToSpecial(String value) + private byte[] hashOneValue(final int id) + { + final String value = selector.lookupName(id); + return CardinalityAggregator.hashFn.hashUnencodedChars(nullToSpecial(value)).asBytes(); + } + + private static String nullToSpecial(String value) { return value == null ? CARDINALITY_AGG_NULL_STRING : value; } diff --git a/processing/src/main/java/io/druid/query/dimension/ArrayCacheIntFunction.java b/processing/src/main/java/io/druid/query/dimension/ArrayCacheIntFunction.java new file mode 100644 index 000000000000..f99bca720313 --- /dev/null +++ b/processing/src/main/java/io/druid/query/dimension/ArrayCacheIntFunction.java @@ -0,0 +1,56 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.dimension; + +import java.util.function.IntFunction; + +/** + * Array cache for an IntFunction, intended for use with DimensionSelectors. + * + * @see io.druid.segment.DimensionSelectorUtils#cacheIfPossible + */ +public class ArrayCacheIntFunction implements IntFunction +{ + private final IntFunction function; + private final Object[] cache; + + public ArrayCacheIntFunction(final IntFunction function, final int cacheSize) + { + this.function = function; + this.cache = new Object[cacheSize]; + } + + @Override + public T apply(final int id) + { + // Will not cache the result if "function" returns null. I'm hoping that this is the right choice, and enabling + // null caching isn't worth the overhead of using some additional data structures to differentiate between a null + // result and an uncached result. + + if (cache[id] == null) { + final T value = function.apply(id); + cache[id] = value; + return value; + } else { + //noinspection unchecked + return (T) cache[id]; + } + } +} diff --git a/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java index 9cc2b49e56e7..10378129bddc 100644 --- a/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java @@ -24,5 +24,9 @@ public interface ColumnSelectorStrategyFactory { - ColumnSelectorStrategyClass makeColumnSelectorStrategy(ColumnCapabilities capabilities, ColumnValueSelector selector); + ColumnSelectorStrategyClass makeColumnSelectorStrategy( + ColumnCapabilities capabilities, + ColumnValueSelector selector, + int numRows + ); } diff --git a/processing/src/main/java/io/druid/query/dimension/LruCacheIntFunction.java b/processing/src/main/java/io/druid/query/dimension/LruCacheIntFunction.java new file mode 100644 index 000000000000..7172d9003a15 --- /dev/null +++ b/processing/src/main/java/io/druid/query/dimension/LruCacheIntFunction.java @@ -0,0 +1,109 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.dimension; + +import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; + +import java.util.function.IntFunction; + +/** + * LRU cache for an IntFunction, intended for use with DimensionSelectors. + * + * @see io.druid.segment.DimensionSelectorUtils#cacheIfPossible + */ +public class LruCacheIntFunction implements IntFunction +{ + // After a certain point, freeze or ignore the cache, based on hit rate. The idea is that hopefully by then, + // LRU should have some reasonable values in the cache if there are reasonable values to be found. + private static final int CACHE_FREEZE_FACTOR = 10; + + enum State + { + ACTIVE, + FROZEN, + IGNORED + } + + private final IntFunction function; + private final int cacheSize; + private final long lookupsBeforeFreezing; + private Int2ObjectLinkedOpenHashMap cache; + + private State state = State.IGNORED; + private long hits; + private long lookups; + + public LruCacheIntFunction( + final IntFunction function, + final int cacheSize + ) + { + this.function = function; + this.cacheSize = cacheSize; + this.lookupsBeforeFreezing = cacheSize * CACHE_FREEZE_FACTOR; + this.cache = new Int2ObjectLinkedOpenHashMap<>(cacheSize); + } + + @Override + public T apply(final int id) + { + if (state == State.IGNORED) { + return function.apply(id); + } else if (state == State.FROZEN) { + final T cachedValue = cache.get(id); + if (cachedValue != null) { + return cachedValue; + } else { + return function.apply(id); + } + } + + lookups++; + + T value = cache.getAndMoveToFirst(id); + + if (value == null) { + value = function.apply(id); + + while (cache.size() >= cacheSize) { + cache.removeLast(); + } + + cache.putAndMoveToFirst(id, value); + } else { + hits++; + } + + // After a certain point, freeze or ignore the cache, based on hit rate. + if (lookups > lookupsBeforeFreezing) { + if (hits < lookups / 3) { + // Hit rate < 33% + state = State.IGNORED; + + // Drop reference to cache to help GC + cache = null; + } else { + state = State.FROZEN; + } + } + + return value; + } +} diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java index 7bedf8b7ccae..3a96ec057911 100644 --- a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java @@ -42,7 +42,7 @@ public static ValueMatcherColumnSelectorStrategyFactory instance() @Override public ValueMatcherColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 6db22e1efb19..ff6949df63bd 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -237,7 +237,7 @@ private static class GroupByStrategyFactory implements ColumnSelectorStrategyFac { @Override public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index 290cf13753bf..c8e997fa15a4 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -551,7 +551,7 @@ private static class InputRawSupplierColumnSelectorStrategyFactory { @Override public InputRawSupplierColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 7921d717b9a4..3abc144968b2 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -71,7 +71,7 @@ private static class SearchColumnSelectorStrategyFactory { @Override public SearchColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index bd3268f003c1..6d0665dbafad 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -69,7 +69,7 @@ public static class SelectStrategyFactory implements ColumnSelectorStrategyFacto { @Override public SelectColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategyFactory.java index 1d252f6a5759..9d8acb534c0b 100644 --- a/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategyFactory.java @@ -29,7 +29,7 @@ public class TopNColumnSelectorStrategyFactory implements ColumnSelectorStrategy { @Override public TopNColumnSelectorStrategy makeColumnSelectorStrategy( - ColumnCapabilities capabilities, ColumnValueSelector selector + ColumnCapabilities capabilities, ColumnValueSelector selector, int numRows ) { ValueType type = capabilities.getType(); diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java index 40fe05862c90..a7d79f629387 100644 --- a/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/ColumnSelectorFactory.java @@ -31,6 +31,8 @@ @PublicApi public interface ColumnSelectorFactory { + int ROWS_UNKNOWN = -1; + DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec); /** @@ -50,4 +52,13 @@ public interface ColumnSelectorFactory */ @Nullable ColumnCapabilities getColumnCapabilities(String column); + + /** + * Returns the number of rows in the backing store for this column selector factory, or ROWS_UNKNOWN if unknown. + * Note that this number may change over time, e.g. for a realtime index. + */ + default int getNumRows() + { + return ROWS_UNKNOWN; + } } diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index c10ed586325f..2a71e3568018 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -147,6 +147,7 @@ ColumnSelectorPlus[] createColumnSelectorPluses( ) { int dimCount = dimensionSpecs.size(); + int numRows = columnSelectorFactory.getNumRows(); ColumnSelectorPlus[] dims = new ColumnSelectorPlus[dimCount]; for (int i = 0; i < dimCount; i++) { final DimensionSpec dimSpec = dimensionSpecs.get(i); @@ -159,7 +160,8 @@ ColumnSelectorPlus[] createColumnSelectorPluses( strategyFactory, dimSpec, columnSelectorFactory.getColumnCapabilities(dimSpec.getDimension()), - selector + selector, + numRows ); final ColumnSelectorPlus selectorPlus = new ColumnSelectorPlus<>( dimName, @@ -230,11 +232,12 @@ private static Colu ColumnSelectorStrategyFactory strategyFactory, DimensionSpec dimSpec, ColumnCapabilities capabilities, - ColumnValueSelector selector + ColumnValueSelector selector, + int numRows ) { capabilities = getEffectiveCapabilities(dimSpec, capabilities); - return strategyFactory.makeColumnSelectorStrategy(capabilities, selector); + return strategyFactory.makeColumnSelectorStrategy(capabilities, selector, numRows); } @Nullable diff --git a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java index f83be70241b7..56ff7464c087 100644 --- a/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionSelectorUtils.java @@ -23,6 +23,8 @@ import com.google.common.base.Predicates; import com.google.common.base.Strings; import io.druid.java.util.common.IAE; +import io.druid.query.dimension.ArrayCacheIntFunction; +import io.druid.query.dimension.LruCacheIntFunction; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.ValueMatcher; import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; @@ -32,6 +34,7 @@ import javax.annotation.Nullable; import java.util.BitSet; import java.util.Objects; +import java.util.function.IntFunction; public final class DimensionSelectorUtils { @@ -270,4 +273,39 @@ public static DimensionSelector constantSelector( return constantSelector(extractionFn.apply(value)); } } + + /** + * Return "function" possibly decorated with array or LRU based caching. The function had better be operating on + * ids from the "selector" you pass, or else unexpected behavior will occur. + * + * @param selector a dimension selector + * @param function a function that receives ids from "selector" + * @param numRows number of rows in the storage backing this selector, or + * {@link ColumnSelectorFactory#ROWS_UNKNOWN} if unknown + * @param cacheSize maximum cache size + */ + public static IntFunction cacheIfPossible( + final DimensionSelector selector, + final IntFunction function, + final int numRows, + final int cacheSize + ) + { + if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN + || !selector.nameLookupPossibleInAdvance()) { + // Caching is not possible, oh well. + return function; + } else if (selector.getValueCardinality() <= cacheSize) { + // Use an array cache when possible. + return new ArrayCacheIntFunction<>(function, selector.getValueCardinality()); + } else if (numRows != ColumnSelectorFactory.ROWS_UNKNOWN && selector.getValueCardinality() <= numRows / 10 * 9) { + // LRU cache has noticeable overhead and is only worth it for dimensions with some repeating values. So only use + // it if dimension cardinality is < 90% of row count. Note that internally the LRU cache also has a mechanism for + // disabling itself if it is getting poor hit rates. + return new LruCacheIntFunction<>(function, cacheSize); + } else { + // Caching is possible, but we choose not to do it, since cardinality is too high. + return function; + } + } } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java index 2f0d3122c477..d5ede2ecc0e0 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java @@ -148,4 +148,10 @@ public ColumnCapabilities getColumnCapabilities(String columnName) return QueryableIndexStorageAdapter.getColumnCapabilites(index, columnName); } + + @Override + public int getNumRows() + { + return index.getNumRows(); + } } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java index d965b80aa120..912557ba2fdc 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexColumnSelectorFactory.java @@ -145,4 +145,10 @@ public ColumnCapabilities getColumnCapabilities(String columnName) return index.getCapabilities(columnName); } + + @Override + public int getNumRows() + { + return index.size(); + } } diff --git a/processing/src/main/java/io/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/io/druid/segment/virtual/ExpressionSelectors.java index 2c2a3a80a93f..4bd41f49e6f5 100644 --- a/processing/src/main/java/io/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/io/druid/segment/virtual/ExpressionSelectors.java @@ -48,9 +48,15 @@ import javax.annotation.Nullable; import java.util.List; import java.util.Map; +import java.util.function.IntFunction; public class ExpressionSelectors { + // Number of values to cache per input. Expected entry size is the length of strings from a dimension selector. + // Size this relatively small, being conservative with memory. With relatively long 1KB strings, this will use 500KB + // per input column. More common case should be < 100KB. + private static final int DIMENSION_SUPPLIER_CACHE_SIZE = 500; + private ExpressionSelectors() { // No instantiation. @@ -141,7 +147,8 @@ public static ColumnValueSelector makeExprEvalSelector( // Optimization for expressions that hit one string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), - expression + expression, + columnSelectorFactory.getNumRows() ); } } @@ -244,7 +251,8 @@ private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelector supplier = columnSelectorFactory.makeColumnValueSelector(columnName)::getDouble; } else if (nativeType == ValueType.STRING) { supplier = supplierFromDimensionSelector( - columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)) + columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), + columnSelectorFactory.getNumRows() ); } else if (nativeType == null) { // Unknown ValueType. Try making an Object selector and see if that gives us anything useful. @@ -278,14 +286,22 @@ private static Expr.ObjectBinding createBindings(Expr expression, ColumnSelector @VisibleForTesting @Nonnull - static Supplier supplierFromDimensionSelector(final DimensionSelector selector) + static Supplier supplierFromDimensionSelector(final DimensionSelector selector, final int numRows) { Preconditions.checkNotNull(selector, "selector"); + + final IntFunction lookup = DimensionSelectorUtils.cacheIfPossible( + selector, + selector::lookupName, + numRows, + DIMENSION_SUPPLIER_CACHE_SIZE + ); + return () -> { final IndexedInts row = selector.getRow(); if (row.size() == 1) { - return selector.lookupName(row.get(0)); + return lookup.apply(row.get(0)); } else { // Can't handle non-singly-valued rows in expressions. // Treat them as nulls until we think of something better to do. diff --git a/processing/src/main/java/io/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/io/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java index fba53f7d3d10..05d27805972b 100644 --- a/processing/src/main/java/io/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/io/druid/segment/virtual/SingleStringInputCachingExpressionColumnValueSelector.java @@ -20,7 +20,6 @@ package io.druid.segment.virtual; import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; import io.druid.java.util.common.ISE; import io.druid.math.expr.Expr; import io.druid.math.expr.ExprEval; @@ -28,10 +27,11 @@ import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionSelectorUtils; import io.druid.segment.data.IndexedInts; -import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; import javax.annotation.Nullable; +import java.util.function.IntFunction; /** * Like {@link ExpressionColumnValueSelector}, but caches results for the first CACHE_SIZE dictionary IDs of @@ -39,17 +39,19 @@ */ public class SingleStringInputCachingExpressionColumnValueSelector implements ColumnValueSelector { - private static final int CACHE_SIZE = 1000; + // Number of entries to cache. Each one is a primitive + overhead, so 12500 entries occupies about 250KB + private static final int CACHE_SIZE = 12500; private final DimensionSelector selector; private final Expr expression; - private final Expr.ObjectBinding bindings; - private final ExprEval[] arrayEvalCache; - private final LruEvalCache lruEvalCache; + private final SingleInputBindings bindings = new SingleInputBindings(); + private final IntFunction singleValueEvalCache; + private ExprEval nullEval = null; public SingleStringInputCachingExpressionColumnValueSelector( final DimensionSelector selector, - final Expr expression + final Expr expression, + final int numRows ) { // Verify expression has just one binding. @@ -60,18 +62,15 @@ public SingleStringInputCachingExpressionColumnValueSelector( this.selector = Preconditions.checkNotNull(selector, "selector"); this.expression = Preconditions.checkNotNull(expression, "expression"); - final Supplier inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector); - this.bindings = name -> inputSupplier.get(); - - if (selector.getValueCardinality() == DimensionSelector.CARDINALITY_UNKNOWN) { - throw new ISE("Selector must have a dictionary"); - } else if (selector.getValueCardinality() <= CACHE_SIZE) { - arrayEvalCache = new ExprEval[selector.getValueCardinality()]; - lruEvalCache = null; - } else { - arrayEvalCache = null; - lruEvalCache = new LruEvalCache(expression, bindings); - } + this.singleValueEvalCache = DimensionSelectorUtils.cacheIfPossible( + selector, + id -> { + bindings.set(selector.lookupName(id)); + return expression.eval(bindings); + }, + numRows, + CACHE_SIZE + ); } @Override @@ -79,6 +78,7 @@ public void inspectRuntimeShape(final RuntimeShapeInspector inspector) { inspector.visit("selector", selector); inspector.visit("expression", expression); + inspector.visit("singleValueEvalCache", singleValueEvalCache); } @Override @@ -117,48 +117,15 @@ private ExprEval eval() final IndexedInts row = selector.getRow(); if (row.size() == 1) { - final int id = row.get(0); - - if (arrayEvalCache != null) { - if (arrayEvalCache[id] == null) { - arrayEvalCache[id] = expression.eval(bindings); - } - return arrayEvalCache[id]; - } else { - assert lruEvalCache != null; - return lruEvalCache.compute(id); - } - } - - return expression.eval(bindings); - } - - public static class LruEvalCache - { - private final Expr expression; - private final Expr.ObjectBinding bindings; - private final Int2ObjectLinkedOpenHashMap m = new Int2ObjectLinkedOpenHashMap<>(CACHE_SIZE); - - public LruEvalCache(final Expr expression, final Expr.ObjectBinding bindings) - { - this.expression = expression; - this.bindings = bindings; - } - - public ExprEval compute(final int id) - { - ExprEval value = m.getAndMoveToFirst(id); - - if (value == null) { - value = expression.eval(bindings); - m.putAndMoveToFirst(id, value); - - if (m.size() > CACHE_SIZE) { - m.removeLast(); - } + return singleValueEvalCache.apply(row.get(0)); + } else { + // Tread non-singly-valued rows as nulls, just like ExpressionSelectors.supplierFromDimensionSelector. + if (nullEval == null) { + bindings.set(null); + nullEval = expression.eval(bindings); } - return value; + return nullEval; } } } diff --git a/processing/src/main/java/io/druid/segment/virtual/VirtualizedColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/virtual/VirtualizedColumnSelectorFactory.java index 6a9652d864f6..33756d3c9ed9 100644 --- a/processing/src/main/java/io/druid/segment/virtual/VirtualizedColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/virtual/VirtualizedColumnSelectorFactory.java @@ -73,4 +73,10 @@ public ColumnCapabilities getColumnCapabilities(String columnName) return baseFactory.getColumnCapabilities(columnName); } } + + @Override + public int getNumRows() + { + return baseFactory.getNumRows(); + } } diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java index 0e8d1700dc74..67caecff6778 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java @@ -33,6 +33,7 @@ import io.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorColumnSelectorStrategy; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionSelector; import java.nio.ByteBuffer; @@ -89,7 +90,7 @@ public String[] apply(Integer input) final ColumnSelectorPlus dimInfo1 = new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), + new StringCardinalityAggregatorColumnSelectorStrategy(dim1, ColumnSelectorFactory.ROWS_UNKNOWN), dim1 ); diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index e8024d913faf..c98dc004dab2 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -44,6 +44,7 @@ import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.ValueMatcher; import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionSelector; import io.druid.segment.DimensionSelectorUtils; import io.druid.segment.IdLookup; @@ -168,7 +169,7 @@ public ValueMatcher makeValueMatcher(Predicate predicate) @Override public int getValueCardinality() { - return 1; + return lookup.size(); } @Override @@ -318,12 +319,12 @@ public CardinalityAggregatorTest() new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(dim1, ColumnSelectorFactory.ROWS_UNKNOWN), dim1 ), new ColumnSelectorPlus( dimSpec2.getDimension(), dimSpec2.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(dim2, ColumnSelectorFactory.ROWS_UNKNOWN), dim2 ) ); @@ -374,12 +375,19 @@ public CardinalityAggregatorTest() new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim1WithExtraction + new StringCardinalityAggregatorColumnSelectorStrategy( + dim1WithExtraction, + ColumnSelectorFactory.ROWS_UNKNOWN + ), + dim1WithExtraction ), new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim2WithExtraction + new StringCardinalityAggregatorColumnSelectorStrategy( + dim2WithExtraction, + ColumnSelectorFactory.ROWS_UNKNOWN + ), dim2WithExtraction ) ); @@ -395,12 +403,14 @@ public CardinalityAggregatorTest() new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim1ConstantVal + new StringCardinalityAggregatorColumnSelectorStrategy(dim1ConstantVal, ColumnSelectorFactory.ROWS_UNKNOWN), + dim1ConstantVal ), new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim2ConstantVal + new StringCardinalityAggregatorColumnSelectorStrategy(dim2ConstantVal, ColumnSelectorFactory.ROWS_UNKNOWN), + dim2ConstantVal ) ); @@ -443,7 +453,7 @@ public void testAggregateValues() throws Exception public void testBufferAggregateRows() throws Exception { CardinalityBufferAggregator agg = new CardinalityBufferAggregator( - dimInfoList.toArray(new ColumnSelectorPlus[] {}), + dimInfoList.toArray(new ColumnSelectorPlus[]{}), true ); @@ -465,7 +475,7 @@ public void testBufferAggregateRows() throws Exception public void testBufferAggregateValues() throws Exception { CardinalityBufferAggregator agg = new CardinalityBufferAggregator( - dimInfoList.toArray(new ColumnSelectorPlus[] {}), + dimInfoList.toArray(new ColumnSelectorPlus[]{}), false ); @@ -492,14 +502,14 @@ public void testCombineRows() new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(dim1, ColumnSelectorFactory.ROWS_UNKNOWN), dim1 ) ); List> dimInfo2 = Lists.newArrayList( new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(dim2, ColumnSelectorFactory.ROWS_UNKNOWN), dim2 ) ); @@ -538,14 +548,14 @@ public void testCombineValues() new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(dim1, ColumnSelectorFactory.ROWS_UNKNOWN), dim1 ) ); List> dimInfo2 = Lists.newArrayList( new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(dim2, ColumnSelectorFactory.ROWS_UNKNOWN), dim2 ) ); diff --git a/processing/src/test/java/io/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/io/druid/segment/virtual/ExpressionColumnValueSelectorTest.java index 8e8e8c1349e4..4ab6367136fb 100644 --- a/processing/src/test/java/io/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/io/druid/segment/virtual/ExpressionColumnValueSelectorTest.java @@ -22,6 +22,7 @@ import com.google.common.base.Supplier; import io.druid.common.guava.SettableSupplier; import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionSelector; import io.druid.segment.TestObjectColumnSelector; @@ -37,7 +38,8 @@ public void testSupplierFromDimensionSelector() { final SettableSupplier settableSupplier = new SettableSupplier<>(); final Supplier supplier = ExpressionSelectors.supplierFromDimensionSelector( - dimensionSelectorFromSupplier(settableSupplier) + dimensionSelectorFromSupplier(settableSupplier), + ColumnSelectorFactory.ROWS_UNKNOWN ); Assert.assertNotNull(supplier);