From 8034d7734baa8b11e568059b20b7c165de480430 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 10 Oct 2016 17:59:41 -0700 Subject: [PATCH 01/12] Add dimension type-based interface for query processing --- .../io/druid/query/QueryDimensionInfo.java | 53 +++ .../FilteredAggregatorFactory.java | 139 +----- .../cardinality/CardinalityAggregator.java | 61 +-- .../CardinalityAggregatorFactory.java | 42 +- .../CardinalityBufferAggregator.java | 12 +- .../query/groupby/GroupByQueryEngine.java | 148 ++++-- .../epinephelinae/GroupByQueryEngineV2.java | 110 +++-- .../druid/query/search/SearchQueryRunner.java | 108 +++-- .../druid/query/select/SelectQueryEngine.java | 45 +- .../AggregateTopNMetricFirstAlgorithm.java | 10 +- .../druid/query/topn/BaseTopNAlgorithm.java | 8 +- .../topn/DimExtractionTopNAlgorithm.java | 65 +-- .../druid/query/topn/PooledTopNAlgorithm.java | 24 +- .../topn/TimeExtractionTopNAlgorithm.java | 7 +- .../io/druid/query/topn/TopNAlgorithm.java | 4 +- .../java/io/druid/query/topn/TopNMapFn.java | 14 +- .../java/io/druid/query/topn/TopNParams.java | 19 +- .../io/druid/query/topn/TopNQueryEngine.java | 5 + .../io/druid/segment/DimensionHandler.java | 8 +- .../druid/segment/DimensionHandlerUtil.java | 42 +- .../druid/segment/DimensionMergerLegacy.java | 4 + .../io/druid/segment/DimensionMergerV9.java | 4 +- .../druid/segment/DimensionQueryHelper.java | 359 ++++++++++++++ .../java/io/druid/segment/IndexMerger.java | 2 +- .../segment/QueryableIndexStorageAdapter.java | 126 ++--- .../druid/segment/StringDimensionHandler.java | 22 + .../segment/StringDimensionMergerLegacy.java | 8 + .../segment/StringDimensionQueryHelper.java | 443 ++++++++++++++++++ .../segment/incremental/IncrementalIndex.java | 13 +- .../CardinalityAggregatorBenchmark.java | 10 +- .../CardinalityAggregatorTest.java | 73 ++- .../query/groupby/GroupByQueryRunnerTest.java | 29 +- 32 files changed, 1448 insertions(+), 569 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/QueryDimensionInfo.java create mode 100644 processing/src/main/java/io/druid/segment/DimensionQueryHelper.java create mode 100644 processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java diff --git a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java new file mode 100644 index 000000000000..b7be2f61f9a6 --- /dev/null +++ b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java @@ -0,0 +1,53 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query; + +import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.DimensionQueryHelper; + +public class QueryDimensionInfo +{ + public final DimensionSpec spec; + public final DimensionQueryHelper queryHelper; + public final String name; + public final String outputName; + public final Object selector; + public final int keyBufferPosition; + + public QueryDimensionInfo( + DimensionSpec spec, + DimensionQueryHelper queryHelper, + Object selector, + int keyBufferPosition + ) + { + this.spec = spec; + this.queryHelper = queryHelper; + this.name = spec.getDimension(); + this.outputName = spec.getOutputName(); + this.selector = selector; + this.keyBufferPosition = keyBufferPosition; + } + + public int getCardinality() + { + return queryHelper.getCardinality(selector); + } +} diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index f2e97b21cd6a..3006844aeea5 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.filter.DimFilter; import io.druid.query.filter.DruidLongPredicate; @@ -30,7 +31,11 @@ import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.DimensionHandler; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import io.druid.segment.DimensionSelector; +import io.druid.segment.StringDimensionHandler; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -228,67 +233,8 @@ public ValueMatcher makeValueMatcher(final String dimension, final Comparable va ); } - final DimensionSelector selector = columnSelectorFactory.makeDimensionSelector( - new DefaultDimensionSpec(dimension, dimension) - ); - - // Compare "value" as a String. - final String valueString = value == null ? null : Strings.emptyToNull(value.toString()); - - // Missing columns match a null or empty string value, and don't match anything else. - if (selector == null) { - return new BooleanValueMatcher(valueString == null); - } - - final int cardinality = selector.getValueCardinality(); - - if (cardinality >= 0) { - // Dictionary-encoded dimension. Compare by id instead of by value to save time. - final int valueId = selector.lookupId(valueString); - - return new ValueMatcher() - { - @Override - public boolean matches() - { - final IndexedInts row = selector.getRow(); - final int size = row.size(); - if (size == 0) { - // null should match empty rows in multi-value columns - return valueString == null; - } else { - for (int i = 0; i < size; ++i) { - if (row.get(i) == valueId) { - return true; - } - } - return false; - } - } - }; - } else { - // Not dictionary-encoded. Skip the optimization. - return new ValueMatcher() - { - @Override - public boolean matches() - { - final IndexedInts row = selector.getRow(); - final int size = row.size(); - if (size == 0) { - // null should match empty rows in multi-value columns - return valueString == null; - } else { - for (int i = 0; i < size; ++i) { - if (Objects.equals(selector.lookupName(row.get(i)), valueString)) { - return true; - } - } - return false; - } - } - }; - } + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimension, columnSelectorFactory, null); + return queryHelper.getValueMatcher(columnSelectorFactory, value); } public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicateFactory predicateFactory) @@ -298,80 +244,13 @@ public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicat case LONG: return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); case STRING: - return makeStringValueMatcher(dimension, predicateFactory.makeStringPredicate()); + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimension, columnSelectorFactory, null); + return queryHelper.getValueMatcher(columnSelectorFactory, predicateFactory); default: return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); } } - public ValueMatcher makeStringValueMatcher(final String dimension, final Predicate predicate) - { - final DimensionSelector selector = columnSelectorFactory.makeDimensionSelector( - new DefaultDimensionSpec(dimension, dimension) - ); - - final boolean doesMatchNull = predicate.apply(null); - - if (selector == null) { - return new BooleanValueMatcher(doesMatchNull); - } - - final int cardinality = selector.getValueCardinality(); - - if (cardinality >= 0) { - // Dictionary-encoded dimension. Check every value; build a bitset of matching ids. - final BitSet valueIds = new BitSet(cardinality); - for (int i = 0; i < cardinality; i++) { - if (predicate.apply(selector.lookupName(i))) { - valueIds.set(i); - } - } - - return new ValueMatcher() - { - @Override - public boolean matches() - { - final IndexedInts row = selector.getRow(); - final int size = row.size(); - if (size == 0) { - // null should match empty rows in multi-value columns - return doesMatchNull; - } else { - for (int i = 0; i < size; ++i) { - if (valueIds.get(row.get(i))) { - return true; - } - } - return false; - } - } - }; - } else { - // Not dictionary-encoded. Skip the optimization. - return new ValueMatcher() - { - @Override - public boolean matches() - { - final IndexedInts row = selector.getRow(); - final int size = row.size(); - if (size == 0) { - // null should match empty rows in multi-value columns - return doesMatchNull; - } else { - for (int i = 0; i < size; ++i) { - if (predicate.apply(selector.lookupName(row.get(i)))) { - return true; - } - } - return false; - } - } - }; - } - } - private ValueMatcher makeLongValueMatcher(String dimension, DruidLongPredicate predicate) { return Filters.getLongPredicateMatcher( diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index 791bfa80ad82..8307db027b34 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -23,74 +23,53 @@ import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import io.druid.query.aggregation.Aggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; -import io.druid.segment.DimensionSelector; -import io.druid.segment.data.IndexedInts; -import java.util.Arrays; import java.util.List; public class CardinalityAggregator implements Aggregator { - private static final String NULL_STRING = "\u0000"; + public static final String NULL_STRING = "\u0000"; - private final List selectorList; + private final String name; + private final List dimInfoList; private final boolean byRow; - private static final HashFunction hashFn = Hashing.murmur3_128(); + public static final HashFunction hashFn = Hashing.murmur3_128(); public static final char SEPARATOR = '\u0001'; - protected static void hashRow(List selectorList, HyperLogLogCollector collector) + protected static void hashRow(List dimInfoList, HyperLogLogCollector collector) { final Hasher hasher = hashFn.newHasher(); - for (int k = 0; k < selectorList.size(); ++k) { + for (int k = 0; k < dimInfoList.size(); ++k) { if (k != 0) { hasher.putByte((byte) 0); } - final DimensionSelector selector = selectorList.get(k); - final IndexedInts row = selector.getRow(); - final int size = row.size(); - // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. - if (size == 1) { - final String value = selector.lookupName(row.get(0)); - hasher.putUnencodedChars(value != null ? value : NULL_STRING); - } else if (size != 0) { - final String[] values = new String[size]; - for (int i = 0; i < size; ++i) { - final String value = selector.lookupName(row.get(i)); - values[i] = value != null ? value : NULL_STRING; - } - // Values need to be sorted to ensure consistent multi-value ordering across different segments - Arrays.sort(values); - for (int i = 0; i < size; ++i) { - if (i != 0) { - hasher.putChar(SEPARATOR); - } - hasher.putUnencodedChars(values[i]); - } - } + + QueryDimensionInfo dimInfo = dimInfoList.get(k); + dimInfo.queryHelper.hashRow(dimInfo.selector, hasher); } collector.add(hasher.hash().asBytes()); } - protected static void hashValues(final List selectors, HyperLogLogCollector collector) + protected static void hashValues(List dimInfoList, HyperLogLogCollector collector) { - for (final DimensionSelector selector : selectors) { - for (final Integer index : selector.getRow()) { - final String value = selector.lookupName(index); - collector.add(hashFn.hashUnencodedChars(value == null ? NULL_STRING : value).asBytes()); - } + for (final QueryDimensionInfo dimInfo : dimInfoList) { + dimInfo.queryHelper.hashValues(dimInfo.selector, collector); } } private HyperLogLogCollector collector; public CardinalityAggregator( - List selectorList, + String name, + List dimInfoList, boolean byRow ) { - this.selectorList = selectorList; + this.name = name; + this.dimInfoList = dimInfoList; this.collector = HyperLogLogCollector.makeLatestCollector(); this.byRow = byRow; } @@ -99,9 +78,9 @@ public CardinalityAggregator( public void aggregate() { if (byRow) { - hashRow(selectorList, collector); + hashRow(dimInfoList, collector); } else { - hashValues(selectorList, collector); + hashValues(dimInfoList, collector); } } @@ -138,7 +117,7 @@ public String getName() @Override public Aggregator clone() { - return new CardinalityAggregator(selectorList, byRow); + return new CardinalityAggregator(name, dimInfoList, byRow); } @Override diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index e7ea88c57932..2dd843f82547 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -23,9 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; import com.google.common.base.Preconditions; -import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import io.druid.java.util.common.StringUtils; import io.druid.query.aggregation.Aggregator; @@ -33,12 +31,14 @@ import io.druid.query.aggregation.AggregatorFactoryNotMergeableException; import io.druid.query.aggregation.Aggregators; import io.druid.query.aggregation.BufferAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import org.apache.commons.codec.binary.Base64; import java.nio.ByteBuffer; @@ -133,44 +133,38 @@ public CardinalityAggregatorFactory( @Override public Aggregator factorize(final ColumnSelectorFactory columnFactory) { - List selectors = makeDimensionSelectors(columnFactory); + List dimInfoList = makeDimensionInfoList(columnFactory); - if (selectors.isEmpty()) { + if (dimInfoList.isEmpty()) { return Aggregators.noopAggregator(); } - return new CardinalityAggregator(selectors, byRow); + return new CardinalityAggregator(name, dimInfoList, byRow); } @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) { - List selectors = makeDimensionSelectors(columnFactory); + List dimInfoList = makeDimensionInfoList(columnFactory); - if (selectors.isEmpty()) { + if (dimInfoList.isEmpty()) { return Aggregators.noopBufferAggregator(); } - return new CardinalityBufferAggregator(selectors, byRow); + return new CardinalityBufferAggregator(dimInfoList, byRow); } - private List makeDimensionSelectors(final ColumnSelectorFactory columnFactory) + private List makeDimensionInfoList(final ColumnSelectorFactory columnSelectorFactory) { - return Lists.newArrayList( - Iterables.filter( - Iterables.transform( - fields, new Function() - { - @Override - public DimensionSelector apply(DimensionSpec input) - { - return columnFactory.makeDimensionSelector(input); - } - } - ), Predicates.notNull() - ) - ); + List dimInfoList = Lists.newArrayList(); + for (DimensionSpec dimSpec : fields) { + DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimSpec.getDimension(), columnSelectorFactory, null); + Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, columnSelectorFactory); + QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); + dimInfoList.add(dimInfo); + } + return dimInfoList; } @Override diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index c791dc650f43..52c2203f18d9 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -20,25 +20,25 @@ package io.druid.query.aggregation.cardinality; import io.druid.query.aggregation.BufferAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; -import io.druid.segment.DimensionSelector; import java.nio.ByteBuffer; import java.util.List; public class CardinalityBufferAggregator implements BufferAggregator { - private final List selectorList; + private final List dimInfoList; private final boolean byRow; private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); public CardinalityBufferAggregator( - List selectorList, + List dimInfoList, boolean byRow ) { - this.selectorList = selectorList; + this.dimInfoList = dimInfoList; this.byRow = byRow; } @@ -62,9 +62,9 @@ public void aggregate(ByteBuffer buf, int position) try { final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf); if (byRow) { - CardinalityAggregator.hashRow(selectorList, collector); + CardinalityAggregator.hashRow(dimInfoList, collector); } else { - CardinalityAggregator.hashValues(selectorList, collector); + CardinalityAggregator.hashValues(dimInfoList, collector); } } finally { diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index eacf2b397f07..69961128464c 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -25,7 +25,6 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.primitives.Ints; import com.google.inject.Inject; import io.druid.collections.ResourceHolder; import io.druid.collections.StupidPool; @@ -43,12 +42,13 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.aggregation.PostAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.Filter; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import io.druid.segment.StorageAdapter; -import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -57,7 +57,7 @@ import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -67,11 +67,61 @@ */ public class GroupByQueryEngine { - private static final int MISSING_VALUE = -1; - private final Supplier config; private final StupidPool intermediateResultsBufferPool; + /* + * Relative reads change the current position of a ByteBuffer. + * The key comparator uses absolute reads to avoid changing the state of the ByteBuffer. + * ByteBuffer does not provide an absolute bulk get() method, so this method provides that functionality. + */ + private static void getBytesFromBuffer(ByteBuffer src, byte[] dst, int srcOffset, int readLen) + { + for (int i = 0; i < readLen; i++) { + dst[i] = src.get(srcOffset + i); + } + } + + private static final Comparator makeKeyComparator(final List dimInfo) + { + final int maxDimIndex = dimInfo.size(); + final Comparator[] comparators = new Comparator[maxDimIndex]; + final int[] keySizes = new int[maxDimIndex]; + + for (int i = 0; i < maxDimIndex; i++) { + comparators[i] = dimInfo.get(i).queryHelper.getGroupingKeyByteComparator(); + keySizes[i] = dimInfo.get(i).queryHelper.getGroupingKeySize(); + } + + return new Comparator() + { + public int compare(ByteBuffer o1, ByteBuffer o2) + { + int pos = 0; + int limit = o1.limit(); + int ret = 0; + int dimIndex = 0; + + while (pos < limit && dimIndex < maxDimIndex) { + int valLen = keySizes[dimIndex]; + byte[] bytes1 = new byte[valLen]; + byte[] bytes2 = new byte[valLen]; + + getBytesFromBuffer(o1, bytes1, pos, valLen); + getBytesFromBuffer(o2, bytes2, pos, valLen); + pos += valLen; + ret = comparators[dimIndex].compare(bytes1, bytes2); + if (ret != 0) { + return ret; + } + dimIndex++; + } + return ret; + } + }; + } + + @Inject public GroupByQueryEngine( Supplier config, @@ -121,7 +171,7 @@ public Sequence apply(final Cursor cursor) @Override public RowIterator make() { - return new RowIterator(query, cursor, bufferHolder.get(), config.get()); + return new RowIterator(query, cursor, bufferHolder.get(), config.get(), storageAdapter); } @Override @@ -152,19 +202,21 @@ private static class RowUpdater private final BufferAggregator[] aggregators; private final PositionMaintainer positionMaintainer; - private final Map positions = Maps.newTreeMap(); + private final Map positions; // GroupBy queries tend to do a lot of reads from this. We co-store a hash map to make those reads go faster. private final Map positionsHash = Maps.newHashMap(); public RowUpdater( ByteBuffer metricValues, BufferAggregator[] aggregators, - PositionMaintainer positionMaintainer + PositionMaintainer positionMaintainer, + List dimInfo ) { this.metricValues = metricValues; this.aggregators = aggregators; this.positionMaintainer = positionMaintainer; + this.positions = Maps.newTreeMap(makeKeyComparator(dimInfo)); } public int getNumRows() @@ -179,26 +231,27 @@ public Map getPositions() private List updateValues( ByteBuffer key, - List dims + List dims ) { if (dims.size() > 0) { List retVal = null; List unaggregatedBuffers = null; - final DimensionSelector dimSelector = dims.get(0); - final IndexedInts row = dimSelector.getRow(); - if (row == null || row.size() == 0) { - ByteBuffer newKey = key.duplicate(); - newKey.putInt(MISSING_VALUE); - unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size())); - } else { - for (Integer dimValue : row) { - ByteBuffer newKey = key.duplicate(); - newKey.putInt(dimValue); - unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size())); + final QueryDimensionInfo dimInfo = dims.get(0); + final Object selector = dimInfo.selector; + final DimensionQueryHelper queryHelper = dimInfo.queryHelper; + final List dimInfoSublist = dims.subList(1, dims.size()); + final Function> updateValuesFn = new Function>() + { + @Override + public List apply(ByteBuffer input) + { + return updateValues(input, dimInfoSublist); } - } + }; + + unaggregatedBuffers = queryHelper.addDimValuesToGroupingKey(selector, key, updateValuesFn); if (unaggregatedBuffers != null) { if (retVal == null) { retVal = Lists.newArrayList(); @@ -297,8 +350,6 @@ private static class RowIterator implements CloseableIterator private final int maxIntermediateRows; private final List dimensionSpecs; - private final List dimensions; - private final ArrayList dimNames; private final List aggregatorSpecs; private final BufferAggregator[] aggregators; private final String[] metricNames; @@ -306,8 +357,11 @@ private static class RowIterator implements CloseableIterator private List unprocessedKeys; private Iterator delegate; + private final List dimInfoList; + + private final int keySize; - public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config) + public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config, StorageAdapter adapter) { final GroupByQueryConfig querySpecificConfig = config.withOverrides(query); @@ -319,17 +373,20 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu unprocessedKeys = null; delegate = Iterators.emptyIterator(); dimensionSpecs = query.getDimensions(); - dimensions = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); - dimNames = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); + dimInfoList = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); for (int i = 0; i < dimensionSpecs.size(); ++i) { final DimensionSpec dimSpec = dimensionSpecs.get(i); - final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec); - if (selector != null) { - dimensions.add(selector); - dimNames.add(dimSpec.getOutputName()); - } + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + dimSpec.getDimension(), + cursor, + Lists.newArrayList(adapter.getAvailableDimensions()) + ); + final Object selector = queryHelper.getColumnValueSelector(dimSpec, cursor); + QueryDimensionInfo info = new QueryDimensionInfo(dimSpec, queryHelper, selector, 0); + dimInfoList.add(info); } + keySize = getTotalKeySize(); aggregatorSpecs = query.getAggregatorSpecs(); aggregators = new BufferAggregator[aggregatorSpecs.size()]; @@ -343,6 +400,15 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu } } + private int getTotalKeySize() + { + int keySize = 0; + for (QueryDimensionInfo info : dimInfoList) { + keySize += info.queryHelper.getGroupingKeySize(); + } + return keySize; + } + @Override public boolean hasNext() { @@ -361,10 +427,10 @@ public Row next() } final PositionMaintainer positionMaintainer = new PositionMaintainer(0, sizesRequired, metricsBuffer.remaining()); - final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer); + final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer, dimInfoList); if (unprocessedKeys != null) { for (ByteBuffer key : unprocessedKeys) { - final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of()); + final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of()); if (unprocUnproc != null) { throw new ISE("Not enough memory to process the request."); } @@ -372,9 +438,9 @@ public Row next() cursor.advance(); } while (!cursor.isDone() && rowUpdater.getNumRows() < maxIntermediateRows) { - ByteBuffer key = ByteBuffer.allocate(dimensions.size() * Ints.BYTES); + ByteBuffer key = ByteBuffer.allocate(keySize); - unprocessedKeys = rowUpdater.updateValues(key, dimensions); + unprocessedKeys = rowUpdater.updateValues(key, dimInfoList); if (unprocessedKeys != null) { break; } @@ -403,12 +469,10 @@ public Row apply(@Nullable Map.Entry input) Map theEvent = Maps.newLinkedHashMap(); ByteBuffer keyBuffer = input.getKey().duplicate(); - for (int i = 0; i < dimensions.size(); ++i) { - final DimensionSelector dimSelector = dimensions.get(i); - final int dimVal = keyBuffer.getInt(); - if (MISSING_VALUE != dimVal) { - theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); - } + for (int i = 0; i < dimInfoList.size(); ++i) { + final QueryDimensionInfo dimInfo = dimInfoList.get(i); + final Object dimSelector = dimInfo.selector; + dimInfo.queryHelper.readDimValueFromGroupingKey(theEvent, dimInfo.outputName, dimSelector, keyBuffer); } int position = input.getValue(); diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index f79339f4831e..5884806e065c 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -21,8 +21,8 @@ import com.google.common.base.Function; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.primitives.Ints; import io.druid.collections.ResourceHolder; import io.druid.collections.StupidPool; import io.druid.data.input.MapBasedRow; @@ -35,14 +35,15 @@ import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.Sequences; import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.QueryDimensionInfo; +import io.druid.query.dimension.DimensionSpec; import io.druid.query.groupby.GroupByQuery; import io.druid.query.groupby.GroupByQueryConfig; import io.druid.query.groupby.strategy.GroupByStrategyV2; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import io.druid.segment.StorageAdapter; -import io.druid.segment.data.EmptyIndexedInts; -import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -87,7 +88,7 @@ public static Sequence process( false ); - final Grouper.KeySerde keySerde = new GroupByEngineKeySerde(query.getDimensions().size()); + final ResourceHolder bufferHolder = intermediateResultsBufferPool.take(); final String fudgeTimestampString = Strings.emptyToNull( @@ -118,8 +119,8 @@ public GroupByEngineIterator make() config, cursor, bufferHolder.get(), - keySerde, - fudgeTimestamp + fudgeTimestamp, + getDimensionInfo(query, storageAdapter, cursor) ); } @@ -145,6 +146,27 @@ public void close() throws IOException ); } + private static QueryDimensionInfo[] getDimensionInfo(GroupByQuery query, StorageAdapter adapter, Cursor cursor) + { + int dimCount = query.getDimensions().size(); + int curPos = 0; + QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; + + for (int i = 0; i < dimCount; i++) { + final DimensionSpec dimSpec = query.getDimensions().get(i); + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + dimSpec.getDimension(), + cursor, + Lists.newArrayList(adapter.getAvailableDimensions()) + ); + final Object selector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, selector, curPos); + dims[i] = dimInfo; + curPos += queryHelper.getGroupingKeySize(); + } + return dims; + } + private static class GroupByEngineIterator implements Iterator, Closeable { private final GroupByQuery query; @@ -153,10 +175,10 @@ private static class GroupByEngineIterator implements Iterator, Closeable private final ByteBuffer buffer; private final Grouper.KeySerde keySerde; private final DateTime timestamp; - private final DimensionSelector[] selectors; private final ByteBuffer keyBuffer; private final int[] stack; - private final IndexedInts[] valuess; + private final Object[] valuess; + private final QueryDimensionInfo[] dims; private int stackp = Integer.MIN_VALUE; private boolean currentRowWasPartiallyAggregated = false; @@ -167,8 +189,8 @@ public GroupByEngineIterator( final GroupByQueryConfig config, final Cursor cursor, final ByteBuffer buffer, - final Grouper.KeySerde keySerde, - final DateTime fudgeTimestamp + final DateTime fudgeTimestamp, + final QueryDimensionInfo[] dims ) { final int dimCount = query.getDimensions().size(); @@ -177,14 +199,11 @@ public GroupByEngineIterator( this.querySpecificConfig = config.withOverrides(query); this.cursor = cursor; this.buffer = buffer; - this.keySerde = keySerde; + this.keySerde = new GroupByEngineKeySerde(dims); this.keyBuffer = ByteBuffer.allocate(keySerde.keySize()); - this.selectors = new DimensionSelector[dimCount]; - for (int i = 0; i < dimCount; i++) { - this.selectors[i] = cursor.makeDimensionSelector(query.getDimensions().get(i)); - } + this.dims = dims; this.stack = new int[dimCount]; - this.valuess = new IndexedInts[dimCount]; + this.valuess = new Object[dimCount]; // Time is the same for every row in the cursor this.timestamp = fudgeTimestamp != null ? fudgeTimestamp : cursor.getTime(); @@ -224,19 +243,11 @@ public Row next() // Set up stack, valuess, and first grouping in keyBuffer for this row stackp = stack.length - 1; - for (int i = 0; i < selectors.length; i++) { - final DimensionSelector selector = selectors[i]; - - valuess[i] = selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); - - final int position = Ints.BYTES * i; - if (valuess[i].size() == 0) { - stack[i] = 0; - keyBuffer.putInt(position, -1); - } else { - stack[i] = 1; - keyBuffer.putInt(position, valuess[i].get(0)); - } + for (int i = 0; i < dims.length; i++) { + final DimensionQueryHelper queryHelper = dims[i].queryHelper; + valuess[i] = queryHelper.getRowFromDimSelector(dims[i].selector); + int rowSize = queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + stack[i] = rowSize == 0 ? 0 : 1; } } @@ -254,26 +265,14 @@ public Row next() doAggregate = false; } - if (stackp >= 0 && stack[stackp] < valuess[stackp].size()) { + if (stackp >= 0 && stack[stackp] < dims[stackp].queryHelper.getRowSize(valuess[stackp])) { // Load next value for current slot - keyBuffer.putInt( - Ints.BYTES * stackp, - valuess[stackp].get(stack[stackp]) - ); + dims[stackp].queryHelper.addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].keyBufferPosition); stack[stackp]++; - - // Reset later slots for (int i = stackp + 1; i < stack.length; i++) { - final int position = Ints.BYTES * i; - if (valuess[i].size() == 0) { - stack[i] = 0; - keyBuffer.putInt(position, -1); - } else { - stack[i] = 1; - keyBuffer.putInt(position, valuess[i].get(0)); - } + int rowSize = dims[i].queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + stack[i] = rowSize == 0 ? 0 : 1; } - stackp = stack.length - 1; doAggregate = true; } else { @@ -297,15 +296,8 @@ public Row apply(final Grouper.Entry entry) Map theMap = Maps.newLinkedHashMap(); // Add dimensions. - for (int i = 0; i < selectors.length; i++) { - final int id = entry.getKey().getInt(Ints.BYTES * i); - - if (id >= 0) { - theMap.put( - query.getDimensions().get(i).getOutputName(), - selectors[i].lookupName(id) - ); - } + for (int i = 0; i < dims.length; i++) { + dims[i].queryHelper.readValueFromGroupingKeyV2(dims[i], theMap, entry.getKey()); } // Add aggregations. @@ -354,9 +346,13 @@ private static class GroupByEngineKeySerde implements Grouper.KeySerdenewArrayList(adapter.getAvailableDimensions()) + ); + final Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); + return dimInfo; + } + @Override public Sequence> run( final Query> input, @@ -102,16 +116,46 @@ public Sequence> run( // Closing this will cause segfaults in unit tests. final QueryableIndex index = segment.asQueryableIndex(); + final StorageAdapter storageAdapter = segment.asStorageAdapter(); + + // Split dimension list into bitmap-supporting list and non-bitmap supporting list + Iterable dimsToSearch; + if (dimensions == null || dimensions.isEmpty()) { + dimsToSearch = Iterables.transform(storageAdapter.getAvailableDimensions(), Druids.DIMENSION_IDENTITY); + } else { + dimsToSearch = dimensions; + } + + final List bitmapDims; + final List nonbitmapDims; if (index != null) { - final TreeMap retVal = Maps.newTreeMap(query.getSort().getComparator()); + bitmapDims = Lists.newArrayList(); + nonbitmapDims = Lists.newArrayList(); + for (DimensionSpec spec : dimsToSearch) { + if (spec.getDimension().equals(Column.TIME_COLUMN_NAME)) { + bitmapDims.add(spec); + continue; + } + ColumnCapabilities capabilities = storageAdapter.getColumnCapabilities(spec.getDimension()); + if (capabilities == null) { + continue; + } - Iterable dimsToSearch; - if (dimensions == null || dimensions.isEmpty()) { - dimsToSearch = Iterables.transform(index.getAvailableDimensions(), Druids.DIMENSION_IDENTITY); - } else { - dimsToSearch = dimensions; + if (capabilities.hasBitmapIndexes()) { + bitmapDims.add(spec); + } else { + nonbitmapDims.add(spec); + } } + } else { + // no QueryableIndex available, so nothing has bitmaps + bitmapDims = null; + nonbitmapDims = Lists.newArrayList(dimsToSearch); + } + // Get results from bitmap supporting dims first + if (bitmapDims != null) { + final TreeMap retVal = Maps.newTreeMap(query.getSort().getComparator()); final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions(); final ImmutableBitmap baseFilter = @@ -141,7 +185,7 @@ public Sequence> run( timeFilteredBitmap = baseFilter; } - for (DimensionSpec dimension : dimsToSearch) { + for (DimensionSpec dimension : bitmapDims) { final Column column = index.getColumn(dimension.getDimension()); if (column == null) { continue; @@ -176,7 +220,9 @@ public Sequence> run( } } - return makeReturnResult(limit, retVal); + if (nonbitmapDims.size() == 0 || retVal.size() >= limit) { + return makeReturnResult(limit, retVal); + } } final StorageAdapter adapter = segment.asStorageAdapter(); @@ -190,13 +236,6 @@ public Sequence> run( ); } - final Iterable dimsToSearch; - if (dimensions == null || dimensions.isEmpty()) { - dimsToSearch = Iterables.transform(adapter.getAvailableDimensions(), Druids.DIMENSION_IDENTITY); - } else { - dimsToSearch = dimensions; - } - final Sequence cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending); final TreeMap retVal = cursors.accumulate( @@ -210,33 +249,18 @@ public TreeMap accumulate(TreeMap return set; } - Map dimSelectors = Maps.newHashMap(); - for (DimensionSpec dim : dimsToSearch) { - dimSelectors.put( - dim.getOutputName(), - cursor.makeDimensionSelector(dim) - ); - } + Map dimInfoMap = Maps.newLinkedHashMap(); + for (DimensionSpec dim : nonbitmapDims) { + dimInfoMap.put(dim.getOutputName(), getDimInfoFromSpec(dim, adapter, cursor)); + } while (!cursor.isDone()) { - for (Map.Entry entry : dimSelectors.entrySet()) { - final DimensionSelector selector = entry.getValue(); - - if (selector != null) { - final IndexedInts vals = selector.getRow(); - for (int i = 0; i < vals.size(); ++i) { - final String dimVal = selector.lookupName(vals.get(i)); - if (searchQuerySpec.accept(dimVal)) { - MutableInt counter = new MutableInt(1); - MutableInt prev = set.put(new SearchHit(entry.getKey(), dimVal), counter); - if (prev != null) { - counter.add(prev.intValue()); - } - if (set.size() >= limit) { - return set; - } - } - } + for (Map.Entry entry : dimInfoMap.entrySet()) { + final QueryDimensionInfo dimInfo = entry.getValue(); + dimInfo.queryHelper.updateSearchResultSet(dimInfo.outputName, dimInfo.selector, searchQuerySpec, set, limit); + + if (set.size() >= limit) { + return set; } } diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index 75c2a67febda..50e7761943ec 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -28,17 +28,18 @@ import io.druid.java.util.common.guava.Sequence; import io.druid.query.QueryRunnerHelper; import io.druid.query.Result; +import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.Filter; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; import io.druid.segment.Segment; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; -import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import io.druid.timeline.DataSegmentUtils; import org.joda.time.DateTime; @@ -104,11 +105,17 @@ public Result apply(Cursor cursor) final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME); - final Map dimSelectors = Maps.newHashMap(); - for (DimensionSpec dim : dims) { - final DimensionSelector dimSelector = cursor.makeDimensionSelector(dim); - dimSelectors.put(dim.getOutputName(), dimSelector); - builder.addDimension(dim.getOutputName()); + final Map dimInfoMap = Maps.newLinkedHashMap(); + for (DimensionSpec dimSpec : dims) { + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + dimSpec.getDimension(), + cursor, + Lists.newArrayList(adapter.getAvailableDimensions()) + ); + final Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); + dimInfoMap.put(dimSpec.getOutputName(), dimInfo); + builder.addDimension(dimSpec.getOutputName()); } final Map metSelectors = Maps.newHashMap(); @@ -127,26 +134,10 @@ public Result apply(Cursor cursor) final Map theEvent = Maps.newLinkedHashMap(); theEvent.put(EventHolder.timestampKey, new DateTime(timestampColumnSelector.get())); - for (Map.Entry dimSelector : dimSelectors.entrySet()) { - final String dim = dimSelector.getKey(); - final DimensionSelector selector = dimSelector.getValue(); - - if (selector == null) { - theEvent.put(dim, null); - } else { - final IndexedInts vals = selector.getRow(); - - if (vals.size() == 1) { - final String dimVal = selector.lookupName(vals.get(0)); - theEvent.put(dim, dimVal); - } else { - List dimVals = Lists.newArrayList(); - for (int i = 0; i < vals.size(); ++i) { - dimVals.add(selector.lookupName(vals.get(i))); - } - theEvent.put(dim, dimVals); - } - } + for (Map.Entry entry : dimInfoMap.entrySet()) { + final String dim = entry.getKey(); + final QueryDimensionInfo dimInfo = entry.getValue(); + dimInfo.queryHelper.addRowValuesToSelectResult(dimInfo.outputName, dimInfo.selector, theEvent); } for (Map.Entry metSelector : metSelectors.entrySet()) { diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index 3a7d16e51a63..a62c834b5eac 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -25,9 +25,9 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorUtil; import io.druid.query.aggregation.PostAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; import java.nio.ByteBuffer; import java.util.Arrays; @@ -55,11 +55,11 @@ public AggregateTopNMetricFirstAlgorithm( @Override public TopNParams makeInitParams( - DimensionSelector dimSelector, Cursor cursor + QueryDimensionInfo dimInfo, Cursor cursor ) { return new TopNParams( - dimSelector, + dimInfo, cursor, Integer.MAX_VALUE ); @@ -91,7 +91,7 @@ public void run( PooledTopNAlgorithm.PooledTopNParams singleMetricParam = null; int[] dimValSelector = null; try { - singleMetricParam = singleMetricAlgo.makeInitParams(params.getDimSelector(), params.getCursor()); + singleMetricParam = singleMetricAlgo.makeInitParams(params.getDimInfo(), params.getCursor()); singleMetricAlgo.run( singleMetricParam, singleMetricResultBuilder, @@ -109,7 +109,7 @@ public void run( PooledTopNAlgorithm.PooledTopNParams allMetricsParam = null; try { // Run topN for all metrics for top N dimension values - allMetricsParam = allMetricAlgo.makeInitParams(params.getDimSelector(), params.getCursor()); + allMetricsParam = allMetricAlgo.makeInitParams(params.getDimInfo(), params.getCursor()); allMetricAlgo.run( allMetricsParam, resultBuilder, diff --git a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java index 380d8e3b1eaa..c32eb78e1f27 100644 --- a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java @@ -36,7 +36,7 @@ public abstract class BaseTopNAlgorithm implements TopNAlgorithm { - protected static Aggregator[] makeAggregators(Cursor cursor, List aggregatorSpecs) + public static Aggregator[] makeAggregators(Cursor cursor, List aggregatorSpecs) { Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; int aggregatorIndex = 0; @@ -58,7 +58,7 @@ protected static BufferAggregator[] makeBufferAggregators(Cursor cursor, List + public static class AggregatorArrayProvider extends BaseArrayProvider { Aggregator[][] expansionAggs; int cardinality; - public AggregatorArrayProvider(DimensionSelector dimSelector, TopNQuery query, int cardinality) + public AggregatorArrayProvider(DimensionSelector dimSelector, TopNQuery query, int cardinality, Capabilities capabilities) { super(dimSelector, query, capabilities); diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 765b380d2ce3..243f90852d29 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -21,17 +21,16 @@ import com.google.common.collect.Maps; import io.druid.query.aggregation.Aggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; -import io.druid.segment.data.IndexedInts; import java.util.Map; /** * This has to be its own strategy because the pooled topn algorithm assumes each index is unique, and cannot handle multiple index numerals referencing the same dimension value. */ -public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm, TopNParams> +public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm, TopNParams> { private final TopNQuery query; @@ -47,12 +46,12 @@ public DimExtractionTopNAlgorithm( @Override public TopNParams makeInitParams( - final DimensionSelector dimSelector, + final QueryDimensionInfo dimInfo, final Cursor cursor ) { return new TopNParams( - dimSelector, + dimInfo, cursor, Integer.MAX_VALUE ); @@ -61,16 +60,8 @@ public TopNParams makeInitParams( @Override protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) { - final AggregatorArrayProvider provider = new AggregatorArrayProvider( - params.getDimSelector(), - query, - params.getCardinality() - ); - - // Unlike regular topN we cannot rely on ordering to optimize. - // Optimization possibly requires a reverse lookup from value to ID, which is - // not possible when applying an extraction function - return provider.build(); + QueryDimensionInfo dimInfo = params.getDimInfo(); + return dimInfo.queryHelper.getDimExtractionRowSelector(params, query, capabilities); } @Override @@ -80,7 +71,7 @@ protected Aggregator[][] updateDimValSelector(Aggregator[][] aggregators, int nu } @Override - protected Map makeDimValAggregateStore(TopNParams params) + protected Map makeDimValAggregateStore(TopNParams params) { return Maps.newHashMap(); } @@ -89,35 +80,21 @@ protected Map makeDimValAggregateStore(TopNParams params) public void scanAndAggregate( TopNParams params, Aggregator[][] rowSelector, - Map aggregatesStore, + Map aggregatesStore, int numProcessed ) { final Cursor cursor = params.getCursor(); - final DimensionSelector dimSelector = params.getDimSelector(); + final QueryDimensionInfo dimInfo = params.getDimInfo(); while (!cursor.isDone()) { - final IndexedInts dimValues = dimSelector.getRow(); - - for (int i = 0; i < dimValues.size(); ++i) { - - final int dimIndex = dimValues.get(i); - Aggregator[] theAggregators = rowSelector[dimIndex]; - if (theAggregators == null) { - final String key = dimSelector.lookupName(dimIndex); - theAggregators = aggregatesStore.get(key); - if (theAggregators == null) { - theAggregators = makeAggregators(cursor, query.getAggregatorSpecs()); - aggregatesStore.put(key, theAggregators); - } - rowSelector[dimIndex] = theAggregators; - } - - for (Aggregator aggregator : theAggregators) { - aggregator.aggregate(); - } - } - + dimInfo.queryHelper.dimExtractionScanAndAggregate( + dimInfo.selector, + rowSelector, + aggregatesStore, + cursor, + query + ); cursor.advance(); } } @@ -126,11 +103,11 @@ public void scanAndAggregate( protected void updateResults( TopNParams params, Aggregator[][] rowSelector, - Map aggregatesStore, + Map aggregatesStore, TopNResultBuilder resultBuilder ) { - for (Map.Entry entry : aggregatesStore.entrySet()) { + for (Map.Entry entry : aggregatesStore.entrySet()) { Aggregator[] aggs = entry.getValue(); if (aggs != null && aggs.length > 0) { Object[] vals = new Object[aggs.length]; @@ -139,7 +116,7 @@ protected void updateResults( } resultBuilder.addEntry( - entry.getKey(), + entry.getKey() == null ? null : entry.getKey().toString(), entry.getKey(), vals ); @@ -148,9 +125,9 @@ protected void updateResults( } @Override - protected void closeAggregators(Map stringMap) + protected void closeAggregators(Map valueMap) { - for (Aggregator[] aggregators : stringMap.values()) { + for (Aggregator[] aggregators : valueMap.values()) { for (Aggregator agg : aggregators) { agg.close(); } diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index c476ef198509..57140078bb88 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -24,6 +24,7 @@ import io.druid.java.util.common.Pair; import io.druid.java.util.common.guava.CloseQuietly; import io.druid.query.aggregation.BufferAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -57,13 +58,14 @@ public PooledTopNAlgorithm( @Override public PooledTopNParams makeInitParams( - DimensionSelector dimSelector, Cursor cursor + QueryDimensionInfo dimInfo, Cursor cursor ) { ResourceHolder resultsBufHolder = bufferPool.take(); ByteBuffer resultsBuf = resultsBufHolder.get(); resultsBuf.clear(); + final DimensionSelector dimSelector = (DimensionSelector) dimInfo.selector; final int cardinality = dimSelector.getValueCardinality(); if (cardinality < 0) { @@ -103,7 +105,7 @@ public int[] build() final int numValuesPerPass = numBytesPerRecord > 0 ? numBytesToWorkWith / numBytesPerRecord : cardinality; return PooledTopNParams.builder() - .withDimSelector(dimSelector) + .withDimInfo(dimInfo) .withCursor(cursor) .withResultsBufHolder(resultsBufHolder) .withResultsBuf(resultsBuf) @@ -192,7 +194,7 @@ protected void scanAndAggregate( final int numBytesPerRecord = params.getNumBytesPerRecord(); final int[] aggregatorSizes = params.getAggregatorSizes(); final Cursor cursor = params.getCursor(); - final DimensionSelector dimSelector = params.getDimSelector(); + final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); final int[] aggregatorOffsets = new int[aggregatorSizes.length]; for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) { @@ -457,7 +459,7 @@ protected void updateResults( { final ByteBuffer resultsBuf = params.getResultsBuf(); final int[] aggregatorSizes = params.getAggregatorSizes(); - final DimensionSelector dimSelector = params.getDimSelector(); + final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); for (int i = 0; i < positions.length; i++) { int position = positions[i]; @@ -507,7 +509,7 @@ public static class PooledTopNParams extends TopNParams private final TopNMetricSpecBuilder arrayProvider; public PooledTopNParams( - DimensionSelector dimSelector, + QueryDimensionInfo dimInfo, Cursor cursor, ResourceHolder resultsBufHolder, ByteBuffer resultsBuf, @@ -517,7 +519,7 @@ public PooledTopNParams( TopNMetricSpecBuilder arrayProvider ) { - super(dimSelector, cursor, numValuesPerPass); + super(dimInfo, cursor, numValuesPerPass); this.resultsBufHolder = resultsBufHolder; this.resultsBuf = resultsBuf; @@ -558,7 +560,7 @@ public TopNMetricSpecBuilder getArrayProvider() public static class Builder { - private DimensionSelector dimSelector; + private QueryDimensionInfo dimInfo; private Cursor cursor; private ResourceHolder resultsBufHolder; private ByteBuffer resultsBuf; @@ -569,7 +571,7 @@ public static class Builder public Builder() { - dimSelector = null; + dimInfo = null; cursor = null; resultsBufHolder = null; resultsBuf = null; @@ -579,9 +581,9 @@ public Builder() arrayProvider = null; } - public Builder withDimSelector(DimensionSelector dimSelector) + public Builder withDimInfo(QueryDimensionInfo dimInfo) { - this.dimSelector = dimSelector; + this.dimInfo = dimInfo; return this; } @@ -630,7 +632,7 @@ public Builder withArrayProvider(TopNMetricSpecBuilder arrayProvider) public PooledTopNParams build() { return new PooledTopNParams( - dimSelector, + dimInfo, cursor, resultsBufHolder, resultsBuf, diff --git a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java index 3d4980baf5c4..b88b6e6f2ee4 100644 --- a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -21,6 +21,7 @@ import com.google.common.collect.Maps; import io.druid.query.aggregation.Aggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -40,10 +41,10 @@ public TimeExtractionTopNAlgorithm(Capabilities capabilities, TopNQuery query) @Override - public TopNParams makeInitParams(DimensionSelector dimSelector, Cursor cursor) + public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor) { return new TopNParams( - dimSelector, + dimInfo, cursor, Integer.MAX_VALUE ); @@ -73,7 +74,7 @@ protected void scanAndAggregate( ) { final Cursor cursor = params.getCursor(); - final DimensionSelector dimSelector = params.getDimSelector(); + final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); while (!cursor.isDone()) { final String key = dimSelector.lookupName(dimSelector.getRow().get(0)); diff --git a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java index bc65597c1323..6dbb123cfa2b 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java @@ -20,8 +20,8 @@ package io.druid.query.topn; import io.druid.query.aggregation.Aggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; /** */ @@ -31,7 +31,7 @@ public interface TopNAlgorithm public static final int INIT_POSITION_VALUE = -1; public static final int SKIP_POSITION_VALUE = -2; - public TopNParams makeInitParams(DimensionSelector dimSelector, Cursor cursor); + public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor); public void run( Parameters params, diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index d31e84fa7910..915ec74c854c 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -21,8 +21,10 @@ import com.google.common.base.Function; import io.druid.query.Result; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; public class TopNMapFn implements Function> { @@ -42,16 +44,20 @@ public TopNMapFn( @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final DimensionSelector dimSelector = cursor.makeDimensionSelector( - query.getDimensionSpec() + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + query.getDimensionSpec().getDimension(), + cursor, + null ); + final Object dimSelector = queryHelper.getColumnValueSelector(query.getDimensionSpec(), cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo(query.getDimensionSpec(), queryHelper, dimSelector, 0); if (dimSelector == null) { return null; } TopNParams params = null; try { - params = topNAlgorithm.makeInitParams(dimSelector, cursor); + params = topNAlgorithm.makeInitParams(dimInfo, cursor); TopNResultBuilder resultBuilder = BaseTopNAlgorithm.makeResultBuilder(params, query); diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index d9e75a82c081..563b74ee855d 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -19,27 +19,27 @@ package io.druid.query.topn; +import io.druid.query.QueryDimensionInfo; import io.druid.segment.Cursor; -import io.druid.segment.DimensionSelector; /** */ public class TopNParams { - private final DimensionSelector dimSelector; private final Cursor cursor; private final int cardinality; private final int numValuesPerPass; + private final QueryDimensionInfo dimInfo; protected TopNParams( - DimensionSelector dimSelector, + QueryDimensionInfo dimInfo, Cursor cursor, int numValuesPerPass ) { - this.dimSelector = dimSelector; + this.dimInfo = dimInfo; this.cursor = cursor; - this.cardinality = dimSelector.getValueCardinality(); + this.cardinality = dimInfo.getCardinality(); this.numValuesPerPass = numValuesPerPass; if (cardinality < 0) { @@ -47,9 +47,14 @@ protected TopNParams( } } - public DimensionSelector getDimSelector() + public Object getDimSelector() { - return dimSelector; + return dimInfo.selector; + } + + public QueryDimensionInfo getDimInfo() + { + return dimInfo; } public Cursor getCursor() diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java index 4330e0a4d81b..78c1b822b63b 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java @@ -33,9 +33,13 @@ import io.druid.query.filter.Filter; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.DimensionHandler; +import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionQueryHelper; import io.druid.segment.SegmentMissingException; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; +import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.filter.Filters; import org.joda.time.Interval; @@ -93,6 +97,7 @@ private Function> getMapFn(TopNQuery query, fina { final Capabilities capabilities = adapter.getCapabilities(); final String dimension = query.getDimensionSpec().getDimension(); + final ColumnCapabilities columnCapabilities = adapter.getColumnCapabilities(dimension); final int cardinality = adapter.getDimensionCardinality(dimension); diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java index 411d76aaaaa5..786817e0dbac 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandler.java @@ -26,6 +26,7 @@ import java.io.Closeable; import java.io.File; +import java.io.IOException; /** * Processing related interface @@ -93,7 +94,7 @@ public DimensionMergerV9 makeMerger( IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress - ); + ) throws IOException; /** @@ -116,7 +117,10 @@ public DimensionMergerLegacy makeLegacyMerger( IOPeon ioPeon, ColumnCapabilities capabilities, ProgressIndicator progress - ); + ) throws IOException; + + + public DimensionQueryHelper makeQueryHelper(); /** diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java index 87376e149ffe..bd5bc15442f5 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java @@ -21,9 +21,14 @@ import io.druid.java.util.common.IAE; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import com.google.common.base.Function; +import com.google.common.collect.Lists; +import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; +import java.util.List; + public final class DimensionHandlerUtil { private DimensionHandlerUtil() {} @@ -35,6 +40,14 @@ public static DimensionHandler getHandlerFromCapabilities( ) { DimensionHandler handler = null; + if (capabilities == null) { + return null; + } + + if (dimensionName.equals(Column.TIME_COLUMN_NAME)) { + return new StringDimensionHandler(Column.TIME_COLUMN_NAME, MultiValueHandling.ARRAY); + } + if (capabilities.getType() == ValueType.STRING) { if (!capabilities.isDictionaryEncoded() || !capabilities.hasBitmapIndexes()) { throw new IAE("String column must have dictionary encoding and bitmap index."); @@ -43,9 +56,36 @@ public static DimensionHandler getHandlerFromCapabilities( multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; handler = new StringDimensionHandler(dimensionName, multiValueHandling); } + if (capabilities.getType() == ValueType.LONG) { + //handler = new LongDimensionHandler(dimensionName); + } + + if (capabilities.getType() == ValueType.FLOAT) { + //handler = new FloatDimensionHandler(dimensionName); + } + if (handler == null) { - throw new IAE("Could not create handler from invalid column type: " + capabilities.getType()); + //return new StringDimensionHandler(dimensionName); + //throw new IAE("Could not create handler from invalid column type: " + capabilities.getType()); } return handler; } + + public static DimensionQueryHelper makeQueryHelper(String dimName, ColumnSelectorFactory columnSelectorFactory, List availableDimensions) + { + final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); + DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities, null); + // treat null columns as strings + if (handler == null) { + handler = new StringDimensionHandler(dimName, null); + } + // treat metrics as null for now + if (availableDimensions != null) { + if (!Lists.newArrayList(availableDimensions).contains(dimName)) { + handler = new StringDimensionHandler(dimName, null); + } + } + final DimensionQueryHelper queryHelper = handler.makeQueryHelper(); + return queryHelper; + } } diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java index 360eb8672b2f..df543c3e0d19 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java @@ -23,6 +23,7 @@ import com.google.common.io.OutputSupplier; import io.druid.common.guava.FileOutputSupplier; +import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -62,4 +63,7 @@ public void writeIndexesToFiles( ByteSink invertedOut, OutputSupplier spatialOut ) throws IOException; + + + public File makeDimFile() throws IOException; } diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java index c344c84c5404..73a885c36790 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java @@ -21,6 +21,8 @@ import io.druid.segment.column.ColumnDescriptor; +import java.io.IOException; + /** * Processing related interface * @@ -34,5 +36,5 @@ public interface DimensionMergerV9 extends DimensionMerger, EncodedTypeArray, ActualType extends Comparable> +{ + /** + * Get a typed column value selector (DimensionSelector, LongColumnSelector, etc.) from a ColumnSelectorFactory. + * @param dimensionSpec The dimension of the selector + * @param columnSelectorFactory Column value selector provider + * @return Column value selector for the dimension specified by dimensionSpec. + */ + public Object getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory); + + + /** + * Get the size of a row object. + * + * The type of the row object will depend on the dimension type, e.g.: + * + * String type -> IndexedInts row object + * Long type -> IndexedLongs row object + * + * @param rowValues The row object to return the size of + * @return size of the row object + */ + public int getRowSize(Object rowValues); + + + /** + * Get the cardinality, if possible, from a dimension value selector object. + * + * The class of the row object will depend on the dimension type, e.g: + * + * String type -> DimensionSelector + * Long type -> LongColumnSelector + * + * @param valueSelector The dimension value selector object + * @return Cardinality of the dimension value selector object, -1 if cardinality is not available. + */ + public int getCardinality(Object valueSelector); + + + /** Functions for QueryableIndexStorageAdapter, FilteredAggregatorFactory **/ + /** + * Create a single value ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. + * + * @param cursor ColumnSelectorFactory for creating dimension value selectors + * @param value Value to match against + * @return ValueMatcher that matches on 'value' + */ + public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, Comparable value); + + + /** + * Create a predicate-based ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. + * + * @param cursor ColumnSelectorFactory for creating dimension value selectors + * @param predicateFactory A DruidPredicateFactory that provides the filter predicates to be matched + * @return A ValueMatcher that applies the predicate for this DimensionQueryHelper's value type from the predicateFactory + */ + public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); + + + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * + * @param dimSelector Dimension value selector + * @param hasher Hasher used for cardinality aggregator calculations + */ + public void hashRow(Object dimSelector, Hasher hasher); + + + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * @param dimSelector Dimension value selector + * @param collector HLL collector used for cardinality aggregator calculations + */ + public void hashValues(Object dimSelector, HyperLogLogCollector collector); + + + /** + * Used by GroupByEngine. + * + * Return the size, in bytes, of this dimension's values in the grouping key. + * + * For example, a String implementation would return 4, the size of an int. + * + * @return size, in bytes, of this dimension's values in the grouping key. + */ + public int getGroupingKeySize(); + + + /** + * Used by GroupByEngine. + * + * A grouping key contains a concatenation of byte[] representations of dimension values. + * + * When comparing two grouping keys, the individual dimension values will be compared with comparators + * provided by the query helper. + * + * @return A comparator suitable for comparing byte representations of this dimension's type of values. + */ + public Comparator getGroupingKeyByteComparator(); + + + /** + * Used by GroupByEngine. + * + * Perform a relative read on a grouping key ByteBuffer to retrieve a single dimension value, and + * add the retrieved value to a GroupBy result map. + * + * An implementation may choose to not add anything to the result map + * (e.g., as the String implementation does for empty rows) + * + * @param theEvent Result map for the GroupBy query being served + * @param outputName The output name of this dimension for the GroupBy query being served, as specified in the DimensionSpec + * @param dimSelector Dimension value selector, used for value lookups if needed + * @param keyBuffer Grouping key, already positioned at this dimension's offset + */ + public void readDimValueFromGroupingKey( + Map theEvent, + String outputName, + Object dimSelector, + ByteBuffer keyBuffer + ); + + + /** + * Used by GroupByEngine. + * + * Read the current row from a dimension value selector and add the row values to the grouping key. + * + * This is called by GroupByEngine's updateValues() function, which uses recursion to traverse the dimensions in the grouping set. + * + * Before adding a dimension value to the grouping key, this function should duplicate() the provided key buffer and + * add the value to the duplicate key. + * + * After adding a dimension value to the duplicate grouping key, an implementation of this function should call + * updateValuesFn on the new key to perform the recursion. + * + * For multi-value rows, this function should duplicate the original grouping key before adding each value, and + * call updateValuesFn on each new key. + * + * See StringDimensionQueryHelper for a reference implementation. + * + * @param dimSelector Dimension value selector + * @param key ByteBuffer for the grouping key + * @param updateValuesFn Function provided by GroupByEngine for updateValues() recursion + * @return Return the result of calling updateValuesFn on the updated grouping key + */ + public List addDimValuesToGroupingKey( + Object dimSelector, + ByteBuffer key, + Function> updateValuesFn + ); + + + /** + * Retrieve the current row from a dimension value selector. + * + * @param dimSelector Dimension value selector + * @return Current row + */ + public Object getRowFromDimSelector(Object dimSelector); + + + /** + * Used by GroupByEngineV2. + * + * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and add that value + * to the keyBuffer at keyBufferPosition, and return the size of the row values object. + * + * @param valuesObj row values object + * @param keyBuffer grouping key + * @param keyBufferPosition offset within grouping key + * @return size of the row values object + */ + public int initializeGroupingKeyV2Dimension( + final Object valuesObj, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ); + + + /** + * Used by GroupByEngineV2. + * + * Read the value at rowValueIdx from a row values object and add that value to the keyBuffer at keyBufferPosition. + * + * @param values row values object + * @param rowValueIdx index of the value to read + * @param keyBuffer grouping key + * @param keyBufferPosition offset within grouping key + */ + public void addValueToGroupingKeyV2( + Object values, + int rowValueIdx, + ByteBuffer keyBuffer, + final int keyBufferPosition + ); + + + /** + * Used by GroupByEngineV2. + * + * Read a value from a grouping key and add it to the group by query result map, using the output name specified + * in a DimensionSpec. + * + * An implementation may choose to not add anything to the result map + * (e.g., as the String implementation does for empty rows) + * + * dimInfo provides access to: + * - the keyBufferPosition offset from which to read the value + * - the dimension value selector + * - the DimensionSpec for this dimension from the query + * + * @param dimInfo dimension info containing the key offset, value selector, and dimension spec + * @param resultMap result map for the group by query being served + * @param key grouping key + */ + public void readValueFromGroupingKeyV2( + QueryDimensionInfo dimInfo, + Map resultMap, + ByteBuffer key + ); + + + /** + * Used by DimExtractionTopNAlgorithm. + * + * Create an Aggregator[][] using BaseTopNAlgorithm.AggregatorArrayProvider and the given parameters. + * + * As the Aggregator[][] is used as an integer-based lookup, this method is only applicable for dimension types + * that use integer row values. + * + * A dimension type that does not have integer values should return null. + * + * @param params Parameters for the TopN query being served + * @param query The TopN query being served + * @param capabilities Object indicating if dimension values are sorted + * @return an Aggregator[][] for integer-valued dimensions, null otherwise + */ + public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); + + + /** + * Used by DimExtractionTopNAlgorithm. + * + * Read the current row from a dimension value selector, and for each row value: + * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from + * aggregatesStore (slower map). + * + * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, + * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the + * provided cursor and query, storing them in rowSelector and aggregatesStore + * + * 3. Call aggregate() on each of the aggregators. + * + * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. + * + * @param selector Dimension value selector + * @param rowSelector Integer lookup containing aggregators + * @param aggregatesStore Map containing aggregators + * @param cursor Cursor for the segment being queried + * @param query The TopN query being served. + */ + public void dimExtractionScanAndAggregate( + Object selector, + Aggregator[][] rowSelector, + Map aggregatesStore, + Cursor cursor, + TopNQuery query + ); + + + /** + * Used by the select query. + * + * Read the current row from dimSelector and add the row values to the result map. + * + * Multi-valued rows should be added to the result as a List, single value rows should be added as a single object. + * + * @param outputName Output name for this dimension in the select query being served + * @param dimSelector Dimension value selector + * @param resultMap Output map of the select query being served + */ + public void addRowValuesToSelectResult( + String outputName, + Object dimSelector, + Map resultMap + ); + + + /** + * Used by the search query. + * + * Read the current row from dimSelector and update the search result set. + * + * For each row value: + * 1. Check if searchQuerySpec accept()s the value + * 2. If so, add the value to the result set and increment the counter for that value + * 3. If the size of the result set reaches the limit after adding a value, return early. + * + * @param outputName Output name for this dimension in the search query being served + * @param dimSelector Dimension value selector + * @param searchQuerySpec Spec for the search query + * @param set The result set of the search query + * @param limit The limit of the search query + */ + public void updateSearchResultSet( + String outputName, + Object dimSelector, + SearchQuerySpec searchQuerySpec, + TreeMap set, + int limit + ); +} diff --git a/processing/src/main/java/io/druid/segment/IndexMerger.java b/processing/src/main/java/io/druid/segment/IndexMerger.java index b42c190c5621..d624aa53601b 100644 --- a/processing/src/main/java/io/druid/segment/IndexMerger.java +++ b/processing/src/main/java/io/druid/segment/IndexMerger.java @@ -691,7 +691,7 @@ public void close() throws IOException mergers.add(merger); merger.writeMergedValueMetadata(indexes); - FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, mergedDimensions.get(i)), true); + FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true); merger.writeValueMetadataToFile(dimOut); dimOuts.add(dimOut); } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 38dbe383c617..53ae145c699c 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -296,7 +296,7 @@ public Sequence makeCursors(Filter filter, Interval interval, QueryGranu return Sequences.filter( new CursorSequenceBuilder( - index, + this, actualInterval, gran, offset, @@ -319,20 +319,10 @@ private static ColumnCapabilities getColumnCapabilites(ColumnSelector index, Str return columnObj.getCapabilities(); } - private interface CursorAdvancer - { - public void advance(); - - public void advanceTo(int offset); - - public boolean isDone(); - - public void reset(); - } - private static class CursorSequenceBuilder { - private final ColumnSelector index; + private final StorageAdapter storageAdapter; + private final QueryableIndex index; private final Interval interval; private final QueryGranularity gran; private final Offset offset; @@ -343,7 +333,7 @@ private static class CursorSequenceBuilder private final ColumnSelectorBitmapIndexSelector bitmapIndexSelector; public CursorSequenceBuilder( - ColumnSelector index, + QueryableIndexStorageAdapter storageAdapter, Interval interval, QueryGranularity gran, Offset offset, @@ -354,7 +344,8 @@ public CursorSequenceBuilder( ColumnSelectorBitmapIndexSelector bitmapIndexSelector ) { - this.index = index; + this.storageAdapter = storageAdapter; + this.index = storageAdapter.index; this.interval = interval; this.gran = gran; this.offset = offset; @@ -913,7 +904,7 @@ public void reset() return new QueryableIndexBaseCursor() { CursorOffsetHolderValueMatcherFactory valueMatcherFactory = new CursorOffsetHolderValueMatcherFactory( - index, + storageAdapter, this ); RowOffsetMatcherFactory rowOffsetMatcherFactory = new CursorOffsetHolderRowOffsetMatcherFactory( @@ -1025,108 +1016,57 @@ public void set(Offset currOffset) } } - private static boolean isComparableNullOrEmpty(final Comparable value) - { - if (value instanceof String) { - return Strings.isNullOrEmpty((String) value); - } - return value == null; - } - private static class CursorOffsetHolderValueMatcherFactory implements ValueMatcherFactory { - private final ColumnSelector index; + private final StorageAdapter storageAdapter; private final ColumnSelectorFactory cursor; + private final List availableMetrics; public CursorOffsetHolderValueMatcherFactory( - ColumnSelector index, + StorageAdapter storageAdapter, ColumnSelectorFactory cursor ) { - this.index = index; + this.storageAdapter = storageAdapter; this.cursor = cursor; + this.availableMetrics = Lists.newArrayList(storageAdapter.getAvailableMetrics()); } @Override public ValueMatcher makeValueMatcher(String dimension, final Comparable value) { - if (getTypeForDimension(dimension) == ValueType.LONG) { - return Filters.getLongValueMatcher( - cursor.makeLongColumnSelector(dimension), - value - ); + if (dimension.equals(Column.TIME_COLUMN_NAME) || availableMetrics.contains(dimension)) { + if (getTypeForDimension(dimension) == ValueType.LONG) { + return Filters.getLongValueMatcher( + cursor.makeLongColumnSelector(dimension), + value + ); + } } - final DimensionSelector selector = cursor.makeDimensionSelector( - new DefaultDimensionSpec(dimension, dimension) + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + dimension, + cursor, + Lists.newArrayList(storageAdapter.getAvailableDimensions()) ); - - // if matching against null, rows with size 0 should also match - final boolean matchNull = isComparableNullOrEmpty(value); - - final int id = selector.lookupId((String) value); - if (id < 0) { - return new BooleanValueMatcher(false); - } else { - return new ValueMatcher() - { - @Override - public boolean matches() - { - IndexedInts row = selector.getRow(); - if (row.size() == 0) { - return matchNull; - } - for (int i = 0; i < row.size(); i++) { - if (row.get(i) == id) { - return true; - } - } - return false; - } - }; - } + return queryHelper.getValueMatcher(cursor, value); } @Override public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFactory predicateFactory) { - ValueType type = getTypeForDimension(dimension); - switch (type) { - case LONG: + if (dimension.equals(Column.TIME_COLUMN_NAME) || availableMetrics.contains(dimension)) { + if (getTypeForDimension(dimension) == ValueType.LONG) { return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); - case STRING: - return makeStringValueMatcher(dimension, predicateFactory.makeStringPredicate()); - default: - return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); + } } - } - private ValueMatcher makeStringValueMatcher(String dimension, final Predicate predicate) - { - final DimensionSelector selector = cursor.makeDimensionSelector( - new DefaultDimensionSpec(dimension, dimension) + final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + dimension, + cursor, + Lists.newArrayList(storageAdapter.getAvailableDimensions()) ); - - return new ValueMatcher() - { - final boolean matchNull = predicate.apply(null); - - @Override - public boolean matches() - { - IndexedInts row = selector.getRow(); - if (row.size() == 0) { - return matchNull; - } - for (int i = 0; i < row.size(); i++) { - if (predicate.apply(selector.lookupName(row.get(i)))) { - return true; - } - } - return false; - } - }; + return queryHelper.getValueMatcher(cursor, predicateFactory); } private ValueMatcher makeLongValueMatcher(String dimension, final DruidLongPredicate predicate) @@ -1139,7 +1079,7 @@ private ValueMatcher makeLongValueMatcher(String dimension, final DruidLongPredi private ValueType getTypeForDimension(String dimension) { - ColumnCapabilities capabilities = getColumnCapabilites(index, dimension); + ColumnCapabilities capabilities = cursor.getColumnCapabilities(dimension); return capabilities == null ? ValueType.STRING : capabilities.getType(); } } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index 6be0802408fd..adc5e467e80f 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -19,6 +19,7 @@ package io.druid.segment; +import com.google.common.base.Function; import com.google.common.primitives.Ints; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.segment.column.Column; @@ -215,6 +216,27 @@ public DimensionMergerLegacy makeLegacyMerger( return new StringDimensionMergerLegacy(dimensionName, indexSpec, outDir, ioPeon, capabilities, progress); } + @Override + public DimensionQueryHelper makeQueryHelper() + { + return new StringDimensionQueryHelper(dimensionName); + } + + public static final Function STRING_TRANSFORMER = new Function() + { + @Override + public String apply(final Object o) + { + if (o == null) { + return null; + } + if (o instanceof String) { + return (String) o; + } + return o.toString(); + } + }; + public static final Comparator ENCODED_COMPARATOR = new Comparator() { @Override diff --git a/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java index e55e999da80f..d4c4616a366c 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java @@ -213,4 +213,12 @@ public OutputStream getOutput() throws IOException spatialIoPeon.cleanup(); } } + + @Override + public File makeDimFile() throws IOException + { + return IndexIO.makeDimFile(outDir, dimensionName); + } } + + diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java new file mode 100644 index 000000000000..aed0a539d329 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -0,0 +1,443 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.base.Function; +import com.google.common.base.Predicate; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.hash.Hasher; +import com.google.common.primitives.Ints; +import io.druid.query.aggregation.Aggregator; +import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.CardinalityAggregator; +import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; +import io.druid.query.dimension.DefaultDimensionSpec; +import io.druid.query.dimension.DimensionSpec; +import io.druid.query.filter.DruidPredicateFactory; +import io.druid.query.filter.ValueMatcher; +import io.druid.query.search.search.SearchHit; +import io.druid.query.search.search.SearchQuerySpec; +import io.druid.query.topn.BaseTopNAlgorithm; +import io.druid.query.topn.TopNParams; +import io.druid.query.topn.TopNQuery; +import io.druid.segment.data.EmptyIndexedInts; +import io.druid.segment.data.IndexedInts; +import org.apache.commons.lang.mutable.MutableInt; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; + +public class StringDimensionQueryHelper implements DimensionQueryHelper +{ + private static final int GROUP_BY_MISSING_VALUE = -1; + private final String dimensionName; + + private static Comparator GROUPING_KEY_COMPARATOR = new Comparator() + { + @Override + public int compare(byte[] o1, byte[] o2) + { + int intLhs = Ints.fromByteArray(o1); + int intRhs = Ints.fromByteArray(o2); + return Ints.compare(intLhs, intRhs); + } + }; + + private static boolean isComparableNullOrEmpty(final Comparable value) + { + if (value instanceof String) { + return Strings.isNullOrEmpty((String) value); + } + return value == null; + } + + public StringDimensionQueryHelper(String dimensionName) { + this.dimensionName = dimensionName; + } + + @Override + public Object getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory) + { + return columnSelectorFactory.makeDimensionSelector(dimensionSpec); + } + + @Override + public int getRowSize(Object rowValues) + { + return ((IndexedInts) rowValues).size(); + } + + @Override + public int getCardinality(Object valueSelector) + { + return ((DimensionSelector) valueSelector).getValueCardinality(); + } + + public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final Comparable value) + { + final DimensionSelector selector = cursor.makeDimensionSelector( + new DefaultDimensionSpec(dimensionName, dimensionName) + ); + + // if matching against null, rows with size 0 should also match + final boolean matchNull = isComparableNullOrEmpty(value); + + final int cardinality = selector.getValueCardinality(); + + if (cardinality >= 0) { + // Dictionary-encoded dimension. Compare by id instead of by value to save time. + final int valueId = selector.lookupId((String) value); + + return new ValueMatcher() + { + @Override + public boolean matches() + { + final IndexedInts row = selector.getRow(); + final int size = row.size(); + if (size == 0) { + // null should match empty rows in multi-value columns + return matchNull; + } else { + for (int i = 0; i < size; ++i) { + if (row.get(i) == valueId) { + return true; + } + } + return false; + } + } + }; + } else { + // Not dictionary-encoded. Skip the optimization. + return new ValueMatcher() + { + @Override + public boolean matches() + { + final IndexedInts row = selector.getRow(); + final int size = row.size(); + if (size == 0) { + // null should match empty rows in multi-value columns + return matchNull; + } else { + for (int i = 0; i < size; ++i) { + if (Objects.equals(selector.lookupName(row.get(i)), value)) { + return true; + } + } + return false; + } + } + }; + } + } + + public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory) + { + final DimensionSelector selector = cursor.makeDimensionSelector( + new DefaultDimensionSpec(dimensionName, dimensionName) + ); + + final Predicate predicate = predicateFactory.makeStringPredicate(); + final int cardinality = selector.getValueCardinality(); + final boolean matchNull = predicate.apply(null); + + if (cardinality >= 0) { + // Dictionary-encoded dimension. Check every value; build a bitset of matching ids. + final BitSet valueIds = new BitSet(cardinality); + for (int i = 0; i < cardinality; i++) { + if (predicate.apply(selector.lookupName(i))) { + valueIds.set(i); + } + } + + return new ValueMatcher() + { + @Override + public boolean matches() + { + final IndexedInts row = selector.getRow(); + final int size = row.size(); + if (size == 0) { + // null should match empty rows in multi-value columns + return matchNull; + } else { + for (int i = 0; i < size; ++i) { + if (valueIds.get(row.get(i))) { + return true; + } + } + return false; + } + } + }; + } else { + // Not dictionary-encoded. Skip the optimization. + return new ValueMatcher() + { + @Override + public boolean matches() + { + final IndexedInts row = selector.getRow(); + final int size = row.size(); + if (size == 0) { + // null should match empty rows in multi-value columns + return matchNull; + } else { + for (int i = 0; i < size; ++i) { + if (predicate.apply(selector.lookupName(row.get(i)))) { + return true; + } + } + return false; + } + } + }; + } + } + + public void hashRow(Object dimSelector, Hasher hasher) + { + final DimensionSelector selector = (DimensionSelector) dimSelector; + final IndexedInts row = selector.getRow(); + final int size = row.size(); + // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. + if (size == 1) { + final String value = selector.lookupName(row.get(0)); + hasher.putUnencodedChars(value != null ? value : CardinalityAggregator.NULL_STRING); + } else if (size != 0) { + final String[] values = new String[size]; + for (int i = 0; i < size; ++i) { + final String value = selector.lookupName(row.get(i)); + values[i] = value != null ? value : CardinalityAggregator.NULL_STRING; + } + // Values need to be sorted to ensure consistent multi-value ordering across different segments + Arrays.sort(values); + for (int i = 0; i < size; ++i) { + if (i != 0) { + hasher.putChar(CardinalityAggregator.SEPARATOR); + } + hasher.putUnencodedChars(values[i]); + } + } + } + + public void hashValues(Object dimSelector, HyperLogLogCollector collector) + { + final DimensionSelector selector = (DimensionSelector) dimSelector; + for (final Integer index : selector.getRow()) { + final String value = selector.lookupName(index); + collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(value == null ? CardinalityAggregator.NULL_STRING : value).asBytes()); + } + } + + @Override + public int getGroupingKeySize() + { + return Ints.BYTES; + } + + @Override + public Comparator getGroupingKeyByteComparator() + { + return GROUPING_KEY_COMPARATOR; + } + + @Override + public void readDimValueFromGroupingKey( + Map theEvent, String outputName, Object dimSelector, ByteBuffer keyBuffer + ) + { + final DimensionSelector selector = (DimensionSelector) dimSelector; + final int dimVal = keyBuffer.getInt(); + if (dimVal != GROUP_BY_MISSING_VALUE) { + theEvent.put(outputName, selector.lookupName(dimVal)); + } + } + + @Override + public List addDimValuesToGroupingKey( + Object selector, ByteBuffer key, Function> updateValuesFn + ) + { + List unaggregatedBuffers = null; + final DimensionSelector dimSelector = (DimensionSelector) selector; + final IndexedInts row = dimSelector.getRow(); + if (row == null || row.size() == 0) { + ByteBuffer newKey = key.duplicate(); + newKey.putInt(GROUP_BY_MISSING_VALUE); + unaggregatedBuffers = updateValuesFn.apply(newKey); + } else { + for (Integer dimValue : row) { + ByteBuffer newKey = key.duplicate(); + newKey.putInt(dimValue); + unaggregatedBuffers = updateValuesFn.apply(newKey); + } + } + return unaggregatedBuffers; + } + + @Override + public Object getRowFromDimSelector(Object dimSelector) { + final DimensionSelector selector = (DimensionSelector) dimSelector; + IndexedInts values = selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); + return values; + } + + @Override + public int initializeGroupingKeyV2Dimension( + final Object valuesObj, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ) + { + IndexedInts values = (IndexedInts) valuesObj; + final int rowSize = values.size(); + if (rowSize == 0) { + keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + } else { + keyBuffer.putInt(keyBufferPosition, values.get(0)); + } + return rowSize; + } + + @Override + public void addValueToGroupingKeyV2( + final Object values, + final int rowValueIdx, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ) + { + IndexedInts intValues = (IndexedInts) values; + keyBuffer.putInt( + keyBufferPosition, + intValues.get(rowValueIdx) + ); + } + + @Override + public void readValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, Map resultMap, ByteBuffer key) + { + final int id = key.getInt(dimInfo.keyBufferPosition); + + if (id >= 0) { + resultMap.put( + dimInfo.spec.getOutputName(), + ((DimensionSelector) dimInfo.selector).lookupName(id) + ); + } + } + + @Override + public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities) + { + final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( + (DimensionSelector) params.getDimSelector(), + query, + params.getCardinality(), + capabilities + ); + + // Unlike regular topN we cannot rely on ordering to optimize. + // Optimization possibly requires a reverse lookup from value to ID, which is + // not possible when applying an extraction function + return provider.build(); + } + + @Override + public void dimExtractionScanAndAggregate(Object selector, Aggregator[][] rowSelector, Map aggregatesStore, Cursor cursor, TopNQuery query) + { + final DimensionSelector dimSelector = (DimensionSelector) selector; + final IndexedInts dimValues = dimSelector.getRow(); + + for (int i = 0; i < dimValues.size(); ++i) { + final int dimIndex = dimValues.get(i); + Aggregator[] theAggregators = rowSelector[dimIndex]; + if (theAggregators == null) { + final String key = dimSelector.lookupName(dimIndex); + theAggregators = aggregatesStore.get(key); + if (theAggregators == null) { + theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); + aggregatesStore.put(key, theAggregators); + } + rowSelector[dimIndex] = theAggregators; + } + + for (Aggregator aggregator : theAggregators) { + aggregator.aggregate(); + } + } + } + + + @Override + public void addRowValuesToSelectResult(String outputName, Object dimSelector, Map theEvent) + { + final DimensionSelector selector = (DimensionSelector) dimSelector; + if (selector == null) { + theEvent.put(outputName, null); + } else { + final IndexedInts vals = selector.getRow(); + + if (vals.size() == 1) { + final String dimVal = selector.lookupName(vals.get(0)); + theEvent.put(outputName, dimVal); + } else { + List dimVals = Lists.newArrayList(); + for (int i = 0; i < vals.size(); ++i) { + dimVals.add(selector.lookupName(vals.get(i))); + } + theEvent.put(outputName, dimVals); + } + } + } + + @Override + public void updateSearchResultSet(String outputName, Object dimSelector, SearchQuerySpec searchQuerySpec, TreeMap set, int limit) + { + final DimensionSelector selector = (DimensionSelector) dimSelector; + + if (selector != null) { + final IndexedInts vals = selector.getRow(); + for (int i = 0; i < vals.size(); ++i) { + final String dimVal = selector.lookupName(vals.get(i)); + if (searchQuerySpec.accept(dimVal)) { + MutableInt counter = new MutableInt(1); + MutableInt prev = set.put(new SearchHit(outputName, dimVal), counter); + if (prev != null) { + counter.add(prev.intValue()); + } + if (set.size() >= limit) { + return; + } + } + } + } + } +} diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 0933f554bb13..532c41ab6d87 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -57,6 +57,7 @@ import io.druid.segment.Metadata; import io.druid.segment.NumericColumnSelector; import io.druid.segment.ObjectColumnSelector; +import io.druid.segment.StringDimensionHandler; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilitiesImpl; @@ -207,7 +208,8 @@ public ColumnCapabilities getColumnCapabilities(String columnName) // However, this method may still be called by FilteredAggregatorFactory's ValueMatcherFactory // to check column types. // Just return null, the caller will assume default types in that case. - return null; + //return null; + return columnCapabilities == null ? null : columnCapabilities.get(columnName); } @Override @@ -412,6 +414,9 @@ public IncrementalIndex( capabilities, dimSchema.getMultiValueHandling() ); + if (handler == null) { + handler = new StringDimensionHandler(dimName, null); + } addNewDimension(dimName, capabilities, handler); } columnCapabilities.put(dimName, capabilities); @@ -573,6 +578,9 @@ TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException columnCapabilities.put(dimension, capabilities); } DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimension, capabilities, null); + if (handler == null) { + handler = new StringDimensionHandler(dimension, null); + } desc = addNewDimension(dimension, capabilities, handler); } DimensionHandler handler = desc.getHandler(); @@ -753,6 +761,9 @@ public void loadDimensionIterable(Iterable oldDimensionOrder, Map selectorList; + List dimInfoList; ByteBuffer buf; int pos; @@ -75,16 +78,19 @@ public String[] apply(Integer input) .cycle() .limit(MAX); - + final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null); + final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0); selectorList = Lists.newArrayList( (DimensionSelector) dim1 ); + dimInfoList = Lists.newArrayList(dimInfo1); + agg = new CardinalityBufferAggregator( - selectorList, + dimInfoList, byRow ); diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index a6c7f00033fa..be2bee97adce 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -31,6 +31,7 @@ import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; +import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; @@ -39,6 +40,7 @@ import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.segment.DimensionSelector; +import io.druid.segment.StringDimensionQueryHelper; import io.druid.segment.data.IndexedInts; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntIterators; @@ -244,25 +246,36 @@ private static void bufferAggregate( } } + List dimInfoList; List selectorList; CardinalityAggregatorFactory rowAggregatorFactory; CardinalityAggregatorFactory valueAggregatorFactory; final TestDimensionSelector dim1; final TestDimensionSelector dim2; + List dimInfoListWithExtraction; List selectorListWithExtraction; final TestDimensionSelector dim1WithExtraction; final TestDimensionSelector dim2WithExtraction; + List dimInfoListConstantVal; List selectorListConstantVal; final TestDimensionSelector dim1ConstantVal; final TestDimensionSelector dim2ConstantVal; + final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); + final DimensionSpec dimSpec2 = new DefaultDimensionSpec("dim2", "dim2"); + public CardinalityAggregatorTest() { dim1 = new TestDimensionSelector(values1, null); dim2 = new TestDimensionSelector(values2, null); + dimInfoList = Lists.newArrayList( + new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0), + new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + ); + selectorList = Lists.newArrayList( (DimensionSelector) dim1, dim2 @@ -271,8 +284,8 @@ public CardinalityAggregatorTest() rowAggregatorFactory = new CardinalityAggregatorFactory( "billy", Lists.newArrayList( - new DefaultDimensionSpec("dim1", "dim1"), - new DefaultDimensionSpec("dim2", "dim2") + dimSpec1, + dimSpec2 ), true ); @@ -280,8 +293,8 @@ public CardinalityAggregatorTest() valueAggregatorFactory = new CardinalityAggregatorFactory( "billy", Lists.newArrayList( - new DefaultDimensionSpec("dim1", "dim1"), - new DefaultDimensionSpec("dim2", "dim2") + dimSpec1, + dimSpec2 ), false ); @@ -295,6 +308,10 @@ public CardinalityAggregatorTest() (DimensionSelector) dim1WithExtraction, dim2WithExtraction ); + dimInfoListWithExtraction = Lists.newArrayList( + new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1WithExtraction, 0), + new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2WithExtraction, 0) + ); String helloJsFn = "function(str) { return 'hello' }"; ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getDefault()); @@ -304,13 +321,19 @@ public CardinalityAggregatorTest() (DimensionSelector) dim1ConstantVal, dim2ConstantVal ); + dimInfoListConstantVal = Lists.newArrayList( + new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1ConstantVal, 0), + new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2ConstantVal, 0) + ); + } @Test public void testAggregateRows() throws Exception { CardinalityAggregator agg = new CardinalityAggregator( - selectorList, + "billy", + dimInfoList, true ); @@ -325,7 +348,8 @@ public void testAggregateRows() throws Exception public void testAggregateValues() throws Exception { CardinalityAggregator agg = new CardinalityAggregator( - selectorList, + "billy", + dimInfoList, false ); @@ -339,7 +363,7 @@ public void testAggregateValues() throws Exception public void testBufferAggregateRows() throws Exception { CardinalityBufferAggregator agg = new CardinalityBufferAggregator( - selectorList, + dimInfoList, true ); @@ -360,7 +384,7 @@ public void testBufferAggregateRows() throws Exception public void testBufferAggregateValues() throws Exception { CardinalityBufferAggregator agg = new CardinalityBufferAggregator( - selectorList, + dimInfoList, false ); @@ -382,9 +406,15 @@ public void testCombineRows() { List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); + List dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0) + ); + List dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + ); - CardinalityAggregator agg1 = new CardinalityAggregator(selector1, true); - CardinalityAggregator agg2 = new CardinalityAggregator(selector2, true); + CardinalityAggregator agg1 = new CardinalityAggregator("billy", dimInfo1, true); + CardinalityAggregator agg2 = new CardinalityAggregator("billy", dimInfo2, true); for (int i = 0; i < values1.size(); ++i) { aggregate(selector1, agg1); @@ -414,8 +444,15 @@ public void testCombineValues() List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - CardinalityAggregator agg1 = new CardinalityAggregator(selector1, false); - CardinalityAggregator agg2 = new CardinalityAggregator(selector2, false); + List dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0) + ); + List dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + ); + + CardinalityAggregator agg1 = new CardinalityAggregator("billy", dimInfo1, false); + CardinalityAggregator agg2 = new CardinalityAggregator("billy", dimInfo2, false); for (int i = 0; i < values1.size(); ++i) { aggregate(selector1, agg1); @@ -443,7 +480,8 @@ public void testCombineValues() public void testAggregateRowsWithExtraction() throws Exception { CardinalityAggregator agg = new CardinalityAggregator( - selectorListWithExtraction, + "billy", + dimInfoListWithExtraction, true ); for (int i = 0; i < values1.size(); ++i) { @@ -452,7 +490,8 @@ public void testAggregateRowsWithExtraction() throws Exception Assert.assertEquals(9.0, (Double) rowAggregatorFactory.finalizeComputation(agg.get()), 0.05); CardinalityAggregator agg2 = new CardinalityAggregator( - selectorListConstantVal, + "billy", + dimInfoListConstantVal, true ); for (int i = 0; i < values1.size(); ++i) { @@ -465,7 +504,8 @@ public void testAggregateRowsWithExtraction() throws Exception public void testAggregateValuesWithExtraction() throws Exception { CardinalityAggregator agg = new CardinalityAggregator( - selectorListWithExtraction, + "billy", + dimInfoListWithExtraction, false ); for (int i = 0; i < values1.size(); ++i) { @@ -474,7 +514,8 @@ public void testAggregateValuesWithExtraction() throws Exception Assert.assertEquals(7.0, (Double) valueAggregatorFactory.finalizeComputation(agg.get()), 0.05); CardinalityAggregator agg2 = new CardinalityAggregator( - selectorListConstantVal, + "billy", + dimInfoListConstantVal, false ); for (int i = 0; i < values1.size(); ++i) { diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 40d335a2a900..79491bf57595 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -718,6 +718,7 @@ public void testMultipleDimensionsOneOfWhichIsMultiValue1() ); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + List res = Lists.newArrayList(results); TestHelper.assertExpectedObjects(expectedResults, results, ""); } @@ -4986,7 +4987,8 @@ public void testSubqueryWithOuterCardinalityAggregator() // COMPLEX is not currently supported as a dimension type, so IAE is thrown. Even if it were, the actual string // values in the "quality" column could not be interpreted as hyperUniques. if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { - expectedException.expect(IAE.class); + //expectedException.expect(IAE.class); + expectedException.expect(ClassCastException.class); GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); } else { List expectedResults = Arrays.asList( @@ -5089,8 +5091,29 @@ public void testSubqueryWithOuterJavascriptAggregators() // Additionally, the V1 strategy always uses "combining" aggregator factories (meant for merging) on the subquery, // which does not work for this particular javascript agg. if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { - expectedException.expect(IAE.class); - GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "automotive", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "business", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "entertainment", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "health", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "mezzanine", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "news", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "premium", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "technology", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "travel", "js_agg", Double.NaN), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "automotive", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "business", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "entertainment", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "health", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "mezzanine", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "news", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "premium", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "technology", "js_agg", Double.NaN), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "quality", "travel", "js_agg", Double.NaN) + ); + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); } else { List expectedResults = Arrays.asList( GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "quality", "automotive", "js_agg", 139D), From 17cad2a6a6d41aada563121d14a30e156e6b1c52 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 9 Nov 2016 14:43:29 -0800 Subject: [PATCH 02/12] PR comment changes --- .../io/druid/query/QueryDimensionInfo.java | 45 ++- .../FilteredAggregatorFactory.java | 16 +- .../cardinality/CardinalityAggregator.java | 3 - .../CardinalityAggregatorFactory.java | 9 +- .../query/filter/ValueMatcherFactory.java | 2 +- .../query/groupby/GroupByQueryEngine.java | 73 ++-- .../epinephelinae/GroupByQueryEngineV2.java | 21 +- .../druid/query/search/SearchQueryRunner.java | 335 ++++++++++-------- .../druid/query/select/SelectQueryEngine.java | 15 +- .../java/io/druid/query/topn/TopNMapFn.java | 7 +- .../java/io/druid/query/topn/TopNParams.java | 2 +- .../io/druid/query/topn/TopNQueryEngine.java | 6 - .../io/druid/segment/ColumnValueSelector.java | 24 ++ .../io/druid/segment/DimensionHandler.java | 20 +- ...erUtil.java => DimensionHandlerUtils.java} | 41 +-- .../io/druid/segment/DimensionIndexer.java | 36 +- .../io/druid/segment/DimensionMerger.java | 10 +- .../druid/segment/DimensionMergerLegacy.java | 8 +- .../io/druid/segment/DimensionMergerV9.java | 2 +- .../druid/segment/DimensionQueryHelper.java | 109 +++--- .../io/druid/segment/DimensionSelector.java | 2 +- .../io/druid/segment/FloatColumnSelector.java | 2 +- .../java/io/druid/segment/IndexMerger.java | 2 +- .../io/druid/segment/LongColumnSelector.java | 2 +- .../druid/segment/ObjectColumnSelector.java | 2 +- .../segment/QueryableIndexStorageAdapter.java | 7 +- .../druid/segment/SimpleQueryableIndex.java | 2 +- .../druid/segment/StringDimensionHandler.java | 2 +- .../segment/StringDimensionQueryHelper.java | 152 ++++---- .../segment/incremental/IncrementalIndex.java | 21 +- .../query/groupby/GroupByQueryRunnerTest.java | 1 - 31 files changed, 524 insertions(+), 455 deletions(-) create mode 100644 processing/src/main/java/io/druid/segment/ColumnValueSelector.java rename processing/src/main/java/io/druid/segment/{DimensionHandlerUtil.java => DimensionHandlerUtils.java} (62%) diff --git a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java index b7be2f61f9a6..8e1d3e7b72d5 100644 --- a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java +++ b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java @@ -20,21 +20,56 @@ package io.druid.query; import io.druid.query.dimension.DimensionSpec; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionQueryHelper; +/** + * A grouping of various related objects used during query processing for a single dimension, used for convenience. + * + * Each QueryDimensionInfo is associated with a single dimension. + */ public class QueryDimensionInfo { + /** + * The DimensionSpec representing this QueryDimensionInfo's dimension, taken from the query being processed. + */ public final DimensionSpec spec; + + /** + * Helper object that handles type-specific operations for this dimension within query processing engines. + */ public final DimensionQueryHelper queryHelper; + + /** + * Internal name of the dimension. + */ public final String name; + + /** + * Name of the dimension to be returned in query results. + */ public final String outputName; - public final Object selector; + + /** + * Column value selector for this dimension, e.g. a DimensionSelector for String dimensions. + */ + public final ColumnValueSelector selector; + + /** + * Cardinality of the dimension's value set, taken from the queryHelper. + */ + public final int cardinality; + + /** + * Used by the GroupBy engines, indicates the offset of this dimension's value within the grouping key. + */ public final int keyBufferPosition; + public QueryDimensionInfo( DimensionSpec spec, DimensionQueryHelper queryHelper, - Object selector, + ColumnValueSelector selector, int keyBufferPosition ) { @@ -43,11 +78,7 @@ public QueryDimensionInfo( this.name = spec.getDimension(); this.outputName = spec.getOutputName(); this.selector = selector; + this.cardinality = queryHelper.getCardinality(selector); this.keyBufferPosition = keyBufferPosition; } - - public int getCardinality() - { - return queryHelper.getCardinality(selector); - } } diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index 3006844aeea5..c16bbfd6f84d 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -21,33 +21,23 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.filter.DimFilter; import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.DimensionHandler; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; -import io.druid.segment.DimensionSelector; -import io.druid.segment.StringDimensionHandler; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; -import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.BooleanValueMatcher; import io.druid.segment.filter.Filters; import java.nio.ByteBuffer; -import java.util.BitSet; import java.util.Comparator; import java.util.List; -import java.util.Objects; public class FilteredAggregatorFactory extends AggregatorFactory { @@ -233,7 +223,7 @@ public ValueMatcher makeValueMatcher(final String dimension, final Comparable va ); } - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimension, columnSelectorFactory, null); + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimension, columnSelectorFactory, null); return queryHelper.getValueMatcher(columnSelectorFactory, value); } @@ -244,7 +234,7 @@ public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicat case LONG: return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); case STRING: - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimension, columnSelectorFactory, null); + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimension, columnSelectorFactory, null); return queryHelper.getValueMatcher(columnSelectorFactory, predicateFactory); default: return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index 8307db027b34..ef9d2440582d 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -30,14 +30,11 @@ public class CardinalityAggregator implements Aggregator { - public static final String NULL_STRING = "\u0000"; - private final String name; private final List dimInfoList; private final boolean byRow; public static final HashFunction hashFn = Hashing.murmur3_128(); - public static final char SEPARATOR = '\u0001'; protected static void hashRow(List dimInfoList, HyperLogLogCollector collector) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index 2dd843f82547..dc994eb896d7 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -37,7 +37,8 @@ import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.ColumnValueSelector; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; import org.apache.commons.codec.binary.Base64; @@ -157,10 +158,10 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) private List makeDimensionInfoList(final ColumnSelectorFactory columnSelectorFactory) { - List dimInfoList = Lists.newArrayList(); + List dimInfoList = new ArrayList(fields.size()); for (DimensionSpec dimSpec : fields) { - DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper(dimSpec.getDimension(), columnSelectorFactory, null); - Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, columnSelectorFactory); + DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimSpec.getDimension(), columnSelectorFactory, null); + ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, columnSelectorFactory); QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); dimInfoList.add(dimInfo); } diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java index 7cc73267ee1c..d95dabc3e641 100644 --- a/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherFactory.java @@ -40,7 +40,7 @@ public interface ValueMatcherFactory * An implementation of this method should be able to handle dimensions of various types. * * @param dimension The dimension to filter. - * @param value The value to match against. + * @param value The value to match against, represented as a String. * * @return An object that matches row values on the provided value. */ diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index 69961128464c..e48adf9ec6c2 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -45,8 +45,9 @@ import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.Filter; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; import io.druid.segment.StorageAdapter; import io.druid.segment.filter.Filters; @@ -70,26 +71,14 @@ public class GroupByQueryEngine private final Supplier config; private final StupidPool intermediateResultsBufferPool; - /* - * Relative reads change the current position of a ByteBuffer. - * The key comparator uses absolute reads to avoid changing the state of the ByteBuffer. - * ByteBuffer does not provide an absolute bulk get() method, so this method provides that functionality. - */ - private static void getBytesFromBuffer(ByteBuffer src, byte[] dst, int srcOffset, int readLen) - { - for (int i = 0; i < readLen; i++) { - dst[i] = src.get(srcOffset + i); - } - } - - private static final Comparator makeKeyComparator(final List dimInfo) + private static Comparator makeKeyComparator(final List dimInfo) { final int maxDimIndex = dimInfo.size(); - final Comparator[] comparators = new Comparator[maxDimIndex]; + final DimensionQueryHelper[] queryHelpers = new DimensionQueryHelper[maxDimIndex]; final int[] keySizes = new int[maxDimIndex]; for (int i = 0; i < maxDimIndex; i++) { - comparators[i] = dimInfo.get(i).queryHelper.getGroupingKeyByteComparator(); + queryHelpers[i] = dimInfo.get(i).queryHelper; keySizes[i] = dimInfo.get(i).queryHelper.getGroupingKeySize(); } @@ -103,14 +92,8 @@ public int compare(ByteBuffer o1, ByteBuffer o2) int dimIndex = 0; while (pos < limit && dimIndex < maxDimIndex) { - int valLen = keySizes[dimIndex]; - byte[] bytes1 = new byte[valLen]; - byte[] bytes2 = new byte[valLen]; - - getBytesFromBuffer(o1, bytes1, pos, valLen); - getBytesFromBuffer(o2, bytes2, pos, valLen); - pos += valLen; - ret = comparators[dimIndex].compare(bytes1, bytes2); + ret = queryHelpers[dimIndex].compareGroupingKeys(o1, pos, o2, pos); + pos += keySizes[dimIndex]; if (ret != 0) { return ret; } @@ -231,23 +214,23 @@ public Map getPositions() private List updateValues( ByteBuffer key, - List dims + final List dims, + final int curIdx ) { - if (dims.size() > 0) { + if (curIdx < dims.size()) { List retVal = null; List unaggregatedBuffers = null; - final QueryDimensionInfo dimInfo = dims.get(0); - final Object selector = dimInfo.selector; + final QueryDimensionInfo dimInfo = dims.get(curIdx); + final ColumnValueSelector selector = dimInfo.selector; final DimensionQueryHelper queryHelper = dimInfo.queryHelper; - final List dimInfoSublist = dims.subList(1, dims.size()); final Function> updateValuesFn = new Function>() { @Override public List apply(ByteBuffer input) { - return updateValues(input, dimInfoSublist); + return updateValues(input, dims, curIdx + 1); } }; @@ -359,7 +342,8 @@ private static class RowIterator implements CloseableIterator private Iterator delegate; private final List dimInfoList; - private final int keySize; + // total size of the grouping key in bytes + private final int totalKeySize; public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config, StorageAdapter adapter) { @@ -375,18 +359,17 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu dimensionSpecs = query.getDimensions(); dimInfoList = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); - for (int i = 0; i < dimensionSpecs.size(); ++i) { - final DimensionSpec dimSpec = dimensionSpecs.get(i); - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + for (DimensionSpec dimSpec : dimensionSpecs) { + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( dimSpec.getDimension(), cursor, Lists.newArrayList(adapter.getAvailableDimensions()) ); - final Object selector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); QueryDimensionInfo info = new QueryDimensionInfo(dimSpec, queryHelper, selector, 0); dimInfoList.add(info); } - keySize = getTotalKeySize(); + totalKeySize = getTotalKeySize(); aggregatorSpecs = query.getAggregatorSpecs(); aggregators = new BufferAggregator[aggregatorSpecs.size()]; @@ -430,7 +413,7 @@ public Row next() final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer, dimInfoList); if (unprocessedKeys != null) { for (ByteBuffer key : unprocessedKeys) { - final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of()); + final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of(), 0); if (unprocUnproc != null) { throw new ISE("Not enough memory to process the request."); } @@ -438,9 +421,9 @@ public Row next() cursor.advance(); } while (!cursor.isDone() && rowUpdater.getNumRows() < maxIntermediateRows) { - ByteBuffer key = ByteBuffer.allocate(keySize); + ByteBuffer key = ByteBuffer.allocate(totalKeySize); - unprocessedKeys = rowUpdater.updateValues(key, dimInfoList); + unprocessedKeys = rowUpdater.updateValues(key, dimInfoList, 0); if (unprocessedKeys != null) { break; } @@ -469,10 +452,14 @@ public Row apply(@Nullable Map.Entry input) Map theEvent = Maps.newLinkedHashMap(); ByteBuffer keyBuffer = input.getKey().duplicate(); - for (int i = 0; i < dimInfoList.size(); ++i) { - final QueryDimensionInfo dimInfo = dimInfoList.get(i); - final Object dimSelector = dimInfo.selector; - dimInfo.queryHelper.readDimValueFromGroupingKey(theEvent, dimInfo.outputName, dimSelector, keyBuffer); + for (QueryDimensionInfo dimInfo : dimInfoList) { + final ColumnValueSelector dimSelector = dimInfo.selector; + dimInfo.queryHelper.processDimValueFromGroupingKey( + dimInfo.outputName, + dimSelector, + keyBuffer, + theEvent + ); } int position = input.getValue(); diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 5884806e065c..a2c0811c56d3 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -40,8 +40,9 @@ import io.druid.query.groupby.GroupByQuery; import io.druid.query.groupby.GroupByQueryConfig; import io.druid.query.groupby.strategy.GroupByStrategyV2; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; import io.druid.segment.StorageAdapter; import io.druid.segment.filter.Filters; @@ -154,12 +155,12 @@ private static QueryDimensionInfo[] getDimensionInfo(GroupByQuery query, Storage for (int i = 0; i < dimCount; i++) { final DimensionSpec dimSpec = query.getDimensions().get(i); - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( dimSpec.getDimension(), cursor, Lists.newArrayList(adapter.getAvailableDimensions()) ); - final Object selector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, selector, curPos); dims[i] = dimInfo; curPos += queryHelper.getGroupingKeySize(); @@ -246,7 +247,8 @@ public Row next() for (int i = 0; i < dims.length; i++) { final DimensionQueryHelper queryHelper = dims[i].queryHelper; valuess[i] = queryHelper.getRowFromDimSelector(dims[i].selector); - int rowSize = queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + int rowSize = queryHelper.getRowSize(valuess[i]); + queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); stack[i] = rowSize == 0 ? 0 : 1; } } @@ -270,7 +272,8 @@ public Row next() dims[stackp].queryHelper.addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].keyBufferPosition); stack[stackp]++; for (int i = stackp + 1; i < stack.length; i++) { - int rowSize = dims[i].queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + int rowSize = dims[i].queryHelper.getRowSize(valuess[i]); + dims[i].queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); stack[i] = rowSize == 0 ? 0 : 1; } stackp = stack.length - 1; @@ -296,8 +299,8 @@ public Row apply(final Grouper.Entry entry) Map theMap = Maps.newLinkedHashMap(); // Add dimensions. - for (int i = 0; i < dims.length; i++) { - dims[i].queryHelper.readValueFromGroupingKeyV2(dims[i], theMap, entry.getKey()); + for (QueryDimensionInfo dimInfo : dims) { + dimInfo.queryHelper.processValueFromGroupingKeyV2(dimInfo, entry.getKey(), theMap); } // Add aggregations. @@ -349,8 +352,8 @@ private static class GroupByEngineKeySerde implements Grouper.KeySerdenewArrayList(adapter.getAvailableDimensions()) ); - final Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); return dimInfo; } @@ -106,7 +105,6 @@ public Sequence> run( final List dimensions = query.getDimensions(); final SearchQuerySpec searchQuerySpec = query.getQuery(); final int limit = query.getLimit(); - final boolean descending = query.isDescending(); final List intervals = query.getQuerySegmentSpec().getIntervals(); if (intervals.size() != 1) { throw new IAE("Should only have one interval, got[%s]", intervals); @@ -118,115 +116,23 @@ public Sequence> run( final StorageAdapter storageAdapter = segment.asStorageAdapter(); - // Split dimension list into bitmap-supporting list and non-bitmap supporting list - Iterable dimsToSearch; - if (dimensions == null || dimensions.isEmpty()) { - dimsToSearch = Iterables.transform(storageAdapter.getAvailableDimensions(), Druids.DIMENSION_IDENTITY); - } else { - dimsToSearch = dimensions; - } + final List bitmapDims = Lists.newArrayList(); + final List nonBitmapDims = Lists.newArrayList(); + partitionDimensionList(index, storageAdapter, dimensions, bitmapDims, nonBitmapDims); - final List bitmapDims; - final List nonbitmapDims; - if (index != null) { - bitmapDims = Lists.newArrayList(); - nonbitmapDims = Lists.newArrayList(); - for (DimensionSpec spec : dimsToSearch) { - if (spec.getDimension().equals(Column.TIME_COLUMN_NAME)) { - bitmapDims.add(spec); - continue; - } - ColumnCapabilities capabilities = storageAdapter.getColumnCapabilities(spec.getDimension()); - if (capabilities == null) { - continue; - } - - if (capabilities.hasBitmapIndexes()) { - bitmapDims.add(spec); - } else { - nonbitmapDims.add(spec); - } - } - } else { - // no QueryableIndex available, so nothing has bitmaps - bitmapDims = null; - nonbitmapDims = Lists.newArrayList(dimsToSearch); - } + final Object2IntRBTreeMap retVal = new Object2IntRBTreeMap(query.getSort().getComparator()); + retVal.defaultReturnValue(0); // Get results from bitmap supporting dims first - if (bitmapDims != null) { - final TreeMap retVal = Maps.newTreeMap(query.getSort().getComparator()); - final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions(); - - final ImmutableBitmap baseFilter = - filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); - - ImmutableBitmap timeFilteredBitmap; - if (!interval.contains(segment.getDataInterval())) { - MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); - final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); - try (final GenericColumn timeValues = timeColumn.getGenericColumn()) { - - int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true)); - int endIndex = Math.min( - timeValues.length() - 1, - getStartIndexOfTime(timeValues, interval.getEndMillis(), false) - ); - - for (int i = startIndex; i <= endIndex; i++) { - timeBitmap.add(i); - } - - final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); - timeFilteredBitmap = - (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); - } - } else { - timeFilteredBitmap = baseFilter; - } - - for (DimensionSpec dimension : bitmapDims) { - final Column column = index.getColumn(dimension.getDimension()); - if (column == null) { - continue; - } - - final BitmapIndex bitmapIndex = column.getBitmapIndex(); - ExtractionFn extractionFn = dimension.getExtractionFn(); - if (extractionFn == null) { - extractionFn = IdentityExtractionFn.getInstance(); - } - if (bitmapIndex != null) { - for (int i = 0; i < bitmapIndex.getCardinality(); ++i) { - String dimVal = Strings.nullToEmpty(extractionFn.apply(bitmapIndex.getValue(i))); - if (!searchQuerySpec.accept(dimVal)) { - continue; - } - ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); - if (timeFilteredBitmap != null) { - bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap)); - } - if (bitmap.size() > 0) { - MutableInt counter = new MutableInt(bitmap.size()); - MutableInt prev = retVal.put(new SearchHit(dimension.getOutputName(), dimVal), counter); - if (prev != null) { - counter.add(prev.intValue()); - } - if (retVal.size() >= limit) { - return makeReturnResult(limit, retVal); - } - } - } - } - } - - if (nonbitmapDims.size() == 0 || retVal.size() >= limit) { + if (!bitmapDims.isEmpty()) { + processBitmapDims(index, filter, interval, bitmapDims, searchQuerySpec, limit, retVal); + // If there are no non-bitmap dims to search, or we've already hit the result limit, just return now + if (nonBitmapDims.size() == 0 || retVal.size() >= limit) { return makeReturnResult(limit, retVal); } } final StorageAdapter adapter = segment.asStorageAdapter(); - if (adapter == null) { log.makeAlert("WTF!? Unable to process search query on segment.") .addData("segment", segment.getIdentifier()) @@ -235,43 +141,7 @@ public Sequence> run( "Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped." ); } - - final Sequence cursors = adapter.makeCursors(filter, interval, query.getGranularity(), descending); - - final TreeMap retVal = cursors.accumulate( - Maps.newTreeMap(query.getSort().getComparator()), - new Accumulator, Cursor>() - { - @Override - public TreeMap accumulate(TreeMap set, Cursor cursor) - { - if (set.size() >= limit) { - return set; - } - - Map dimInfoMap = Maps.newLinkedHashMap(); - - for (DimensionSpec dim : nonbitmapDims) { - dimInfoMap.put(dim.getOutputName(), getDimInfoFromSpec(dim, adapter, cursor)); - } - while (!cursor.isDone()) { - for (Map.Entry entry : dimInfoMap.entrySet()) { - final QueryDimensionInfo dimInfo = entry.getValue(); - dimInfo.queryHelper.updateSearchResultSet(dimInfo.outputName, dimInfo.selector, searchQuerySpec, set, limit); - - if (set.size() >= limit) { - return set; - } - } - - cursor.advance(); - } - - return set; - } - } - ); - + processNonBitmapDims(query, adapter, filter, interval, limit, nonBitmapDims, searchQuerySpec, retVal); return makeReturnResult(limit, retVal); } @@ -306,13 +176,15 @@ protected int getStartIndexOfTime(GenericColumn timeValues, long time, boolean i } private Sequence> makeReturnResult( - int limit, TreeMap retVal) + int limit, + Object2IntRBTreeMap retVal + ) { Iterable source = Iterables.transform( - retVal.entrySet(), new Function, SearchHit>() + retVal.entrySet(), new Function, SearchHit>() { @Override - public SearchHit apply(Map.Entry input) + public SearchHit apply(Map.Entry input) { SearchHit hit = input.getKey(); return new SearchHit(hit.getDimension(), hit.getValue(), input.getValue().intValue()); @@ -330,4 +202,171 @@ public SearchHit apply(Map.Entry input) ) ); } + + // Split dimension list into bitmap-supporting list and non-bitmap supporting list + private void partitionDimensionList( + QueryableIndex index, + StorageAdapter storageAdapter, + List dimensions, + List bitmapDims, + List nonBitmapDims + ) + { + List dimsToSearch; + if (dimensions == null || dimensions.isEmpty()) { + dimsToSearch = Lists.newArrayList(Iterables.transform( + storageAdapter.getAvailableDimensions(), + Druids.DIMENSION_IDENTITY + )); + } else { + dimsToSearch = dimensions; + } + + if (index != null) { + for (DimensionSpec spec : dimsToSearch) { + if (spec.getDimension().equals(Column.TIME_COLUMN_NAME)) { + bitmapDims.add(spec); + continue; + } + ColumnCapabilities capabilities = storageAdapter.getColumnCapabilities(spec.getDimension()); + if (capabilities == null) { + continue; + } + + if (capabilities.hasBitmapIndexes()) { + bitmapDims.add(spec); + } else { + nonBitmapDims.add(spec); + } + } + } else { + // no QueryableIndex available, so nothing has bitmaps + nonBitmapDims.addAll(dimsToSearch); + } + } + + private void processNonBitmapDims( + SearchQuery query, + final StorageAdapter adapter, + Filter filter, + Interval interval, + final int limit, + final List nonBitmapDims, + final SearchQuerySpec searchQuerySpec, + final Object2IntRBTreeMap retVal + ) + { + final Sequence cursors = adapter.makeCursors(filter, interval, query.getGranularity(), query.isDescending()); + + cursors.accumulate( + retVal, + new Accumulator, Cursor>() + { + @Override + public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap set, Cursor cursor) + { + if (set.size() >= limit) { + return set; + } + + List dimInfoList = Lists.newArrayList(); + for (DimensionSpec dim : nonBitmapDims) { + dimInfoList.add(getDimInfoFromSpec(dim, adapter, cursor)); + } + + while (!cursor.isDone()) { + for (QueryDimensionInfo dimInfo : dimInfoList) { + dimInfo.queryHelper.updateSearchResultSet( + dimInfo.outputName, + dimInfo.selector, + searchQuerySpec, + limit, + set + ); + + if (set.size() >= limit) { + return set; + } + } + + cursor.advance(); + } + + return set; + } + } + ); + } + + private void processBitmapDims( + QueryableIndex index, + Filter filter, + Interval interval, + List bitmapDims, + SearchQuerySpec searchQuerySpec, + int limit, + final Object2IntRBTreeMap retVal + ) + { + final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions(); + + final ImmutableBitmap baseFilter = + filter == null ? null : filter.getBitmapIndex(new ColumnSelectorBitmapIndexSelector(bitmapFactory, index)); + + ImmutableBitmap timeFilteredBitmap; + if (!interval.contains(segment.getDataInterval())) { + MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap(); + final Column timeColumn = index.getColumn(Column.TIME_COLUMN_NAME); + try (final GenericColumn timeValues = timeColumn.getGenericColumn()) { + + int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true)); + int endIndex = Math.min( + timeValues.length() - 1, + getStartIndexOfTime(timeValues, interval.getEndMillis(), false) + ); + + for (int i = startIndex; i <= endIndex; i++) { + timeBitmap.add(i); + } + + final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap); + timeFilteredBitmap = + (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter); + } + } else { + timeFilteredBitmap = baseFilter; + } + + for (DimensionSpec dimension : bitmapDims) { + final Column column = index.getColumn(dimension.getDimension()); + if (column == null) { + continue; + } + + final BitmapIndex bitmapIndex = column.getBitmapIndex(); + ExtractionFn extractionFn = dimension.getExtractionFn(); + if (extractionFn == null) { + extractionFn = IdentityExtractionFn.getInstance(); + } + if (bitmapIndex != null) { + for (int i = 0; i < bitmapIndex.getCardinality(); ++i) { + String dimVal = Strings.nullToEmpty(extractionFn.apply(bitmapIndex.getValue(i))); + if (!searchQuerySpec.accept(dimVal)) { + continue; + } + ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); + if (timeFilteredBitmap != null) { + bitmap = bitmapFactory.intersection(Arrays.asList(timeFilteredBitmap, bitmap)); + } + if (bitmap.size() > 0) { + retVal.addTo(new SearchHit(dimension.getOutputName(), dimVal), bitmap.size()); + if (retVal.size() >= limit) { + return; + } + } + } + } + } + } + } diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index 50e7761943ec..7821f226e5c3 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -32,8 +32,9 @@ import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.Filter; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; @@ -105,16 +106,16 @@ public Result apply(Cursor cursor) final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME); - final Map dimInfoMap = Maps.newLinkedHashMap(); + final List dimInfoList = Lists.newArrayList(); for (DimensionSpec dimSpec : dims) { - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( dimSpec.getDimension(), cursor, Lists.newArrayList(adapter.getAvailableDimensions()) ); - final Object dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); - dimInfoMap.put(dimSpec.getOutputName(), dimInfo); + dimInfoList.add(dimInfo); builder.addDimension(dimSpec.getOutputName()); } @@ -134,9 +135,7 @@ public Result apply(Cursor cursor) final Map theEvent = Maps.newLinkedHashMap(); theEvent.put(EventHolder.timestampKey, new DateTime(timestampColumnSelector.get())); - for (Map.Entry entry : dimInfoMap.entrySet()) { - final String dim = entry.getKey(); - final QueryDimensionInfo dimInfo = entry.getValue(); + for (QueryDimensionInfo dimInfo : dimInfoList) { dimInfo.queryHelper.addRowValuesToSelectResult(dimInfo.outputName, dimInfo.selector, theEvent); } diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index 915ec74c854c..09d699a6b5d3 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -22,8 +22,9 @@ import com.google.common.base.Function; import io.druid.query.Result; import io.druid.query.QueryDimensionInfo; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; public class TopNMapFn implements Function> @@ -44,12 +45,12 @@ public TopNMapFn( @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( query.getDimensionSpec().getDimension(), cursor, null ); - final Object dimSelector = queryHelper.getColumnValueSelector(query.getDimensionSpec(), cursor); + final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(query.getDimensionSpec(), cursor); final QueryDimensionInfo dimInfo = new QueryDimensionInfo(query.getDimensionSpec(), queryHelper, dimSelector, 0); if (dimSelector == null) { return null; diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index 563b74ee855d..24f17087b7c5 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -39,7 +39,7 @@ protected TopNParams( { this.dimInfo = dimInfo; this.cursor = cursor; - this.cardinality = dimInfo.getCardinality(); + this.cardinality = dimInfo.cardinality; this.numValuesPerPass = numValuesPerPass; if (cardinality < 0) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java index 78c1b822b63b..be5e9a3709bb 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java @@ -33,13 +33,9 @@ import io.druid.query.filter.Filter; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandler; -import io.druid.segment.DimensionHandlerUtil; -import io.druid.segment.DimensionQueryHelper; import io.druid.segment.SegmentMissingException; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; -import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.filter.Filters; import org.joda.time.Interval; @@ -97,8 +93,6 @@ private Function> getMapFn(TopNQuery query, fina { final Capabilities capabilities = adapter.getCapabilities(); final String dimension = query.getDimensionSpec().getDimension(); - final ColumnCapabilities columnCapabilities = adapter.getColumnCapabilities(dimension); - final int cardinality = adapter.getDimensionCardinality(dimension); int numBytesPerRecord = 0; diff --git a/processing/src/main/java/io/druid/segment/ColumnValueSelector.java b/processing/src/main/java/io/druid/segment/ColumnValueSelector.java new file mode 100644 index 000000000000..4df130fff945 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/ColumnValueSelector.java @@ -0,0 +1,24 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +public interface ColumnValueSelector +{ +} diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java index 786817e0dbac..403d724d7d49 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandler.java @@ -62,7 +62,7 @@ public interface DimensionHandler, E * * @return Dimension name */ - public String getDimensionName(); + String getDimensionName(); /** @@ -71,7 +71,7 @@ public interface DimensionHandler, E * * @return A new DimensionIndexer object. */ - public DimensionIndexer makeIndexer(); + DimensionIndexer makeIndexer(); /** @@ -88,7 +88,7 @@ public interface DimensionHandler, E * @return A new DimensionMergerV9 object. */ - public DimensionMergerV9 makeMerger( + DimensionMergerV9 makeMerger( IndexSpec indexSpec, File outDir, IOPeon ioPeon, @@ -111,7 +111,7 @@ public DimensionMergerV9 makeMerger( * @return A new DimensionMergerLegacy object. */ - public DimensionMergerLegacy makeLegacyMerger( + DimensionMergerLegacy makeLegacyMerger( IndexSpec indexSpec, File outDir, IOPeon ioPeon, @@ -120,7 +120,7 @@ public DimensionMergerLegacy makeLegacyMerger( ) throws IOException; - public DimensionQueryHelper makeQueryHelper(); + public DimensionQueryHelper makeQueryHelper(); /** @@ -132,7 +132,7 @@ public DimensionMergerLegacy makeLegacyMerger( * @param dimVals Array of row values * @return Size of dimVals */ - public int getLengthFromEncodedArray(EncodedTypeArray dimVals); + int getLengthFromEncodedArray(EncodedTypeArray dimVals); /** @@ -147,7 +147,7 @@ public DimensionMergerLegacy makeLegacyMerger( * * @return integer indicating comparison result of arrays */ - public int compareSortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); + int compareSortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); /** @@ -168,7 +168,7 @@ public DimensionMergerLegacy makeLegacyMerger( * * @return integer indicating comparison result of arrays */ - public void validateSortedEncodedArrays( + void validateSortedEncodedArrays( EncodedTypeArray lhs, EncodedTypeArray rhs, Indexed lhsEncodings, @@ -186,7 +186,7 @@ public void validateSortedEncodedArrays( * @param column Column for this dimension from a QueryableIndex * @return The type-specific column subobject for this dimension. */ - public Closeable getSubColumn(Column column); + Closeable getSubColumn(Column column); /** @@ -200,5 +200,5 @@ public void validateSortedEncodedArrays( * @param currRow The index of the row to retrieve * @return The row from "column" specified by "currRow", as an array of values */ - public Object getRowValueArrayFromColumn(Closeable column, int currRow); + Object getRowValueArrayFromColumn(Closeable column, int currRow); } diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java similarity index 62% rename from processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java rename to processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index bd5bc15442f5..be0f8a0bdd93 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -21,17 +21,15 @@ import io.druid.java.util.common.IAE; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; -import com.google.common.base.Function; -import com.google.common.collect.Lists; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; import java.util.List; -public final class DimensionHandlerUtil +public final class DimensionHandlerUtils { - private DimensionHandlerUtil() {} + private DimensionHandlerUtils() {} public static DimensionHandler getHandlerFromCapabilities( String dimensionName, @@ -39,53 +37,38 @@ public static DimensionHandler getHandlerFromCapabilities( MultiValueHandling multiValueHandling ) { - DimensionHandler handler = null; if (capabilities == null) { - return null; + return new StringDimensionHandler(dimensionName, multiValueHandling); } if (dimensionName.equals(Column.TIME_COLUMN_NAME)) { return new StringDimensionHandler(Column.TIME_COLUMN_NAME, MultiValueHandling.ARRAY); } + multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; + if (capabilities.getType() == ValueType.STRING) { if (!capabilities.isDictionaryEncoded() || !capabilities.hasBitmapIndexes()) { throw new IAE("String column must have dictionary encoding and bitmap index."); } - // use default behavior - multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; - handler = new StringDimensionHandler(dimensionName, multiValueHandling); - } - if (capabilities.getType() == ValueType.LONG) { - //handler = new LongDimensionHandler(dimensionName); - } - - if (capabilities.getType() == ValueType.FLOAT) { - //handler = new FloatDimensionHandler(dimensionName); + return new StringDimensionHandler(dimensionName, multiValueHandling); } - if (handler == null) { - //return new StringDimensionHandler(dimensionName); - //throw new IAE("Could not create handler from invalid column type: " + capabilities.getType()); - } - return handler; + // Return a StringDimensionHandler by default (null columns will be treated as String typed) + return new StringDimensionHandler(dimensionName, multiValueHandling); } public static DimensionQueryHelper makeQueryHelper(String dimName, ColumnSelectorFactory columnSelectorFactory, List availableDimensions) { final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities, null); - // treat null columns as strings - if (handler == null) { - handler = new StringDimensionHandler(dimName, null); - } + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); + // treat metrics as null for now if (availableDimensions != null) { - if (!Lists.newArrayList(availableDimensions).contains(dimName)) { + if (availableDimensions.contains(dimName)) { handler = new StringDimensionHandler(dimName, null); } } - final DimensionQueryHelper queryHelper = handler.makeQueryHelper(); - return queryHelper; + return handler.makeQueryHelper(); } } diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 82d8021bfefa..5d1e554a0319 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -119,7 +119,7 @@ public interface DimensionIndexer, E * * @return An array containing an encoded representation of the input row value. */ - public EncodedTypeArray processRowValsToUnsortedEncodedArray(Object dimValues); + EncodedTypeArray processRowValsToUnsortedEncodedArray(Object dimValues); /** @@ -132,7 +132,7 @@ public interface DimensionIndexer, E * @param unsortedIntermediateValue value to convert * @return converted value */ - public EncodedType getSortedEncodedValueFromUnsorted(EncodedType unsortedIntermediateValue); + EncodedType getSortedEncodedValueFromUnsorted(EncodedType unsortedIntermediateValue); /** @@ -145,7 +145,7 @@ public interface DimensionIndexer, E * @param sortedIntermediateValue value to convert * @return converted value */ - public EncodedType getUnsortedEncodedValueFromSorted(EncodedType sortedIntermediateValue); + EncodedType getUnsortedEncodedValueFromSorted(EncodedType sortedIntermediateValue); /** @@ -159,7 +159,7 @@ public interface DimensionIndexer, E * * @return Sorted index of actual values */ - public Indexed getSortedIndexedValues(); + Indexed getSortedIndexedValues(); /** @@ -177,7 +177,7 @@ public interface DimensionIndexer, E * * @return min value */ - public ActualType getMinValue(); + ActualType getMinValue(); /** @@ -185,7 +185,7 @@ public interface DimensionIndexer, E * * @return max value */ - public ActualType getMaxValue(); + ActualType getMaxValue(); /** @@ -193,7 +193,7 @@ public interface DimensionIndexer, E * * @return value cardinality */ - public int getCardinality(); + int getCardinality(); /** @@ -210,7 +210,7 @@ public interface DimensionIndexer, E * @param desc Descriptor object for this dimension within an IncrementalIndex * @return A new object that reads rows from currEntry */ - public Object makeColumnValueSelector( + Object makeColumnValueSelector( DimensionSpec spec, IncrementalIndexStorageAdapter.EntryHolder currEntry, IncrementalIndex.DimensionDesc desc @@ -239,7 +239,7 @@ public Object makeColumnValueSelector( * @param rhs dimension value array from a TimeAndDims key * @return comparison of the two arrays */ - public int compareUnsortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); + int compareUnsortedEncodedArrays(EncodedTypeArray lhs, EncodedTypeArray rhs); /** @@ -249,7 +249,7 @@ public Object makeColumnValueSelector( * @param rhs dimension value array from a TimeAndDims key * @return true if the two arrays are equal */ - public boolean checkUnsortedEncodedArraysEqual(EncodedTypeArray lhs, EncodedTypeArray rhs); + boolean checkUnsortedEncodedArraysEqual(EncodedTypeArray lhs, EncodedTypeArray rhs); /** @@ -257,10 +257,10 @@ public Object makeColumnValueSelector( * @param key dimension value array from a TimeAndDims key * @return hashcode of the array */ - public int getUnsortedEncodedArrayHashCode(EncodedTypeArray key); + int getUnsortedEncodedArrayHashCode(EncodedTypeArray key); - public static final boolean LIST = true; - public static final boolean ARRAY = false; + boolean LIST = true; + boolean ARRAY = false; /** * Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(), @@ -273,7 +273,7 @@ public Object makeColumnValueSelector( * @param asList if true, return an array; if false, return a list * @return single value, array, or list containing the actual values corresponding to the encoded values in the input array */ - public Object convertUnsortedEncodedArrayToActualArrayOrList(EncodedTypeArray key, boolean asList); + Object convertUnsortedEncodedArrayToActualArrayOrList(EncodedTypeArray key, boolean asList); /** @@ -283,7 +283,7 @@ public Object makeColumnValueSelector( * @param key dimension value array from a TimeAndDims key * @return array containing the sorted encoded values corresponding to the unsorted encoded values in the input array */ - public EncodedTypeArray convertUnsortedEncodedArrayToSortedEncodedArray(EncodedTypeArray key); + EncodedTypeArray convertUnsortedEncodedArrayToSortedEncodedArray(EncodedTypeArray key); /** @@ -307,7 +307,7 @@ public Object makeColumnValueSelector( * @param bitmapIndexes array of bitmaps, indexed by integer dimension value * @param factory bitmap factory */ - public void fillBitmapsFromUnsortedEncodedArray(EncodedTypeArray key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory); + void fillBitmapsFromUnsortedEncodedArray(EncodedTypeArray key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory); /** @@ -326,7 +326,7 @@ public Object makeColumnValueSelector( * @param dimIndex the array index of this indexer's dimension within the TimeAndDims key * @return A ValueMatcher that matches a dimension value array from a TimeAndDims key against "matchValue" */ - public ValueMatcher makeIndexingValueMatcher(Comparable matchValue, IncrementalIndexStorageAdapter.EntryHolder holder, int dimIndex); + ValueMatcher makeIndexingValueMatcher(Comparable matchValue, IncrementalIndexStorageAdapter.EntryHolder holder, int dimIndex); /** @@ -350,5 +350,5 @@ public Object makeColumnValueSelector( * @param dimIndex the array index of this indexer's dimension within the TimeAndDims key * @return A ValueMatcher that applies a predicate from the predicateFactory to the dimension values in the TimeAndDim keys */ - public ValueMatcher makeIndexingValueMatcher(DruidPredicateFactory predicateFactory, IncrementalIndexStorageAdapter.EntryHolder holder, int dimIndex); + ValueMatcher makeIndexingValueMatcher(DruidPredicateFactory predicateFactory, IncrementalIndexStorageAdapter.EntryHolder holder, int dimIndex); } diff --git a/processing/src/main/java/io/druid/segment/DimensionMerger.java b/processing/src/main/java/io/druid/segment/DimensionMerger.java index 4dbf406abe70..d22cc5c0a05a 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMerger.java +++ b/processing/src/main/java/io/druid/segment/DimensionMerger.java @@ -68,7 +68,7 @@ public interface DimensionMerger * @param adapters List of adapters to be merged. * @throws IOException */ - public void writeMergedValueMetadata(List adapters) throws IOException; + void writeMergedValueMetadata(List adapters) throws IOException; /** @@ -86,7 +86,7 @@ public interface DimensionMerger * @param segmentRow A row from a segment to be converted to its representation within the merged sequence of rows. * @param segmentIndexNumber Integer indicating which segment the row originated from. */ - public EncodedTypedArray convertSegmentRowValuesToMergedRowValues(EncodedTypedArray segmentRow, int segmentIndexNumber); + EncodedTypedArray convertSegmentRowValuesToMergedRowValues(EncodedTypedArray segmentRow, int segmentIndexNumber); /** @@ -101,7 +101,7 @@ public interface DimensionMerger * @param rowValues The row values to be added. * @throws IOException */ - public void processMergedRow(EncodedTypedArray rowValues) throws IOException; + void processMergedRow(EncodedTypedArray rowValues) throws IOException; /** @@ -125,7 +125,7 @@ public interface DimensionMerger * @param closer Add Closeables for resource cleanup to this Closer if needed * @throws IOException */ - public void writeIndexes(List segmentRowNumConversions, Closer closer) throws IOException; + void writeIndexes(List segmentRowNumConversions, Closer closer) throws IOException; /** @@ -135,5 +135,5 @@ public interface DimensionMerger * * @return true if this dimension can be excluded from the merged segment. */ - public boolean canSkip(); + boolean canSkip(); } diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java index df543c3e0d19..0da997642fef 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerLegacy.java @@ -42,7 +42,7 @@ public interface DimensionMergerLegacy extends DimensionMerger * @param valueEncodingFile Destination file * @throws IOException */ - public void writeValueMetadataToFile(FileOutputSupplier valueEncodingFile) throws IOException; + void writeValueMetadataToFile(FileOutputSupplier valueEncodingFile) throws IOException; /** @@ -50,7 +50,7 @@ public interface DimensionMergerLegacy extends DimensionMerger * @param rowValueOut Destination file * @throws IOException */ - public void writeRowValuesToFile(FileOutputSupplier rowValueOut) throws IOException; + void writeRowValuesToFile(FileOutputSupplier rowValueOut) throws IOException; /** @@ -59,11 +59,11 @@ public interface DimensionMergerLegacy extends DimensionMerger * @param spatialOut Destination file for spatial indexes * @throws IOException */ - public void writeIndexesToFiles( + void writeIndexesToFiles( ByteSink invertedOut, OutputSupplier spatialOut ) throws IOException; - public File makeDimFile() throws IOException; + File makeDimFile() throws IOException; } diff --git a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java index 73a885c36790..c95a757ee517 100644 --- a/processing/src/main/java/io/druid/segment/DimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/DimensionMergerV9.java @@ -36,5 +36,5 @@ public interface DimensionMergerV9 extends DimensionMerger, EncodedTypeArray, ActualType extends Comparable> +/** + * Query related interface. + * + * Contains a collection of query processing methods for functionality that is dependent on + * the type of a dimension. + * + * Each DimensionQueryHelper is associated with a single dimension. + * + * @param The type of this dimension's values + * @param The type of the row values object for this dimension + * @param The type of the row value selector (e.g. DimensionSelector) for this dimension + */ +public interface DimensionQueryHelper, RowValuesType, ValueSelectorType extends ColumnValueSelector> { /** * Get a typed column value selector (DimensionSelector, LongColumnSelector, etc.) from a ColumnSelectorFactory. @@ -47,7 +58,7 @@ public interface DimensionQueryHelper than the latter + */ + int compareGroupingKeys(ByteBuffer b1, int pos1, ByteBuffer b2, int pos2); /** @@ -142,7 +172,7 @@ public interface DimensionQueryHelper getGroupingKeyByteComparator(); + Comparator getGroupingKeyByteComparator(); /** @@ -159,11 +189,11 @@ public interface DimensionQueryHelper theEvent, + void processDimValueFromGroupingKey( String outputName, - Object dimSelector, - ByteBuffer keyBuffer + ValueSelectorType dimSelector, + ByteBuffer keyBuffer, + Map theEvent ); @@ -190,8 +220,8 @@ public void readDimValueFromGroupingKey( * @param updateValuesFn Function provided by GroupByEngine for updateValues() recursion * @return Return the result of calling updateValuesFn on the updated grouping key */ - public List addDimValuesToGroupingKey( - Object dimSelector, + List addDimValuesToGroupingKey( + ValueSelectorType dimSelector, ByteBuffer key, Function> updateValuesFn ); @@ -203,22 +233,21 @@ public List addDimValuesToGroupingKey( * @param dimSelector Dimension value selector * @return Current row */ - public Object getRowFromDimSelector(Object dimSelector); + RowValuesType getRowFromDimSelector(ValueSelectorType dimSelector); /** * Used by GroupByEngineV2. * - * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and add that value - * to the keyBuffer at keyBufferPosition, and return the size of the row values object. + * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and write that value + * to the keyBuffer at keyBufferPosition. If rowSize is 0, write GROUP_BY_MISSING_VALUE instead. * * @param valuesObj row values object * @param keyBuffer grouping key * @param keyBufferPosition offset within grouping key - * @return size of the row values object */ - public int initializeGroupingKeyV2Dimension( - final Object valuesObj, + void initializeGroupingKeyV2Dimension( + final RowValuesType valuesObj, final ByteBuffer keyBuffer, final int keyBufferPosition ); @@ -227,15 +256,15 @@ public int initializeGroupingKeyV2Dimension( /** * Used by GroupByEngineV2. * - * Read the value at rowValueIdx from a row values object and add that value to the keyBuffer at keyBufferPosition. + * Read the value at rowValueIdx from a row values object and write that value to the keyBuffer at keyBufferPosition. * * @param values row values object * @param rowValueIdx index of the value to read * @param keyBuffer grouping key * @param keyBufferPosition offset within grouping key */ - public void addValueToGroupingKeyV2( - Object values, + void addValueToGroupingKeyV2( + RowValuesType values, int rowValueIdx, ByteBuffer keyBuffer, final int keyBufferPosition @@ -260,10 +289,10 @@ public void addValueToGroupingKeyV2( * @param resultMap result map for the group by query being served * @param key grouping key */ - public void readValueFromGroupingKeyV2( + void processValueFromGroupingKeyV2( QueryDimensionInfo dimInfo, - Map resultMap, - ByteBuffer key + ByteBuffer key, + Map resultMap ); @@ -282,7 +311,7 @@ public void readValueFromGroupingKeyV2( * @param capabilities Object indicating if dimension values are sorted * @return an Aggregator[][] for integer-valued dimensions, null otherwise */ - public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); + Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); /** @@ -306,8 +335,8 @@ public void readValueFromGroupingKeyV2( * @param cursor Cursor for the segment being queried * @param query The TopN query being served. */ - public void dimExtractionScanAndAggregate( - Object selector, + void dimExtractionScanAndAggregate( + ValueSelectorType selector, Aggregator[][] rowSelector, Map aggregatesStore, Cursor cursor, @@ -326,9 +355,9 @@ public void dimExtractionScanAndAggregate( * @param dimSelector Dimension value selector * @param resultMap Output map of the select query being served */ - public void addRowValuesToSelectResult( + void addRowValuesToSelectResult( String outputName, - Object dimSelector, + ValueSelectorType dimSelector, Map resultMap ); @@ -349,11 +378,11 @@ public void addRowValuesToSelectResult( * @param set The result set of the search query * @param limit The limit of the search query */ - public void updateSearchResultSet( + void updateSearchResultSet( String outputName, - Object dimSelector, + ValueSelectorType dimSelector, SearchQuerySpec searchQuerySpec, - TreeMap set, - int limit + int limit, + Object2IntRBTreeMap set ); } diff --git a/processing/src/main/java/io/druid/segment/DimensionSelector.java b/processing/src/main/java/io/druid/segment/DimensionSelector.java index b7ac9ed6ba7b..5bd2c2cec1c7 100644 --- a/processing/src/main/java/io/druid/segment/DimensionSelector.java +++ b/processing/src/main/java/io/druid/segment/DimensionSelector.java @@ -21,7 +21,7 @@ /** */ -public interface DimensionSelector +public interface DimensionSelector extends ColumnValueSelector { public static int CARDINALITY_UNKNOWN = -1; diff --git a/processing/src/main/java/io/druid/segment/FloatColumnSelector.java b/processing/src/main/java/io/druid/segment/FloatColumnSelector.java index 0f0464e5929a..608c6c1577d3 100644 --- a/processing/src/main/java/io/druid/segment/FloatColumnSelector.java +++ b/processing/src/main/java/io/druid/segment/FloatColumnSelector.java @@ -24,7 +24,7 @@ * FloatColumnSelector has a handle onto some other stateful object (e.g. an Offset) which is changing between calls * to get() (though, that doesn't have to be the case if you always want the same value...). */ -public interface FloatColumnSelector +public interface FloatColumnSelector extends ColumnValueSelector { public float get(); } diff --git a/processing/src/main/java/io/druid/segment/IndexMerger.java b/processing/src/main/java/io/druid/segment/IndexMerger.java index d624aa53601b..c815adfb7de5 100644 --- a/processing/src/main/java/io/druid/segment/IndexMerger.java +++ b/processing/src/main/java/io/druid/segment/IndexMerger.java @@ -912,7 +912,7 @@ protected DimensionHandler[] makeDimensionHandlers(final List mergedDime for (int i = 0; i < mergedDimensions.size(); i++) { ColumnCapabilities capabilities = dimCapabilities.get(i); String dimName = mergedDimensions.get(i); - handlers[i] = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities, null); + handlers[i] = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); } return handlers; } diff --git a/processing/src/main/java/io/druid/segment/LongColumnSelector.java b/processing/src/main/java/io/druid/segment/LongColumnSelector.java index aad8bfac329c..869ab49178e9 100644 --- a/processing/src/main/java/io/druid/segment/LongColumnSelector.java +++ b/processing/src/main/java/io/druid/segment/LongColumnSelector.java @@ -21,7 +21,7 @@ /** */ -public interface LongColumnSelector +public interface LongColumnSelector extends ColumnValueSelector { public long get(); } diff --git a/processing/src/main/java/io/druid/segment/ObjectColumnSelector.java b/processing/src/main/java/io/druid/segment/ObjectColumnSelector.java index 06365c01e4f0..71258f4cd2f7 100644 --- a/processing/src/main/java/io/druid/segment/ObjectColumnSelector.java +++ b/processing/src/main/java/io/druid/segment/ObjectColumnSelector.java @@ -19,7 +19,7 @@ package io.druid.segment; -public interface ObjectColumnSelector +public interface ObjectColumnSelector extends ColumnValueSelector { public Class classOfObject(); public T get(); diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 53ae145c699c..f7689a9c90a3 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -20,9 +20,7 @@ package io.druid.segment; import com.google.common.base.Function; -import com.google.common.base.Predicate; import com.google.common.base.Predicates; -import com.google.common.base.Strings; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -37,7 +35,6 @@ import io.druid.math.expr.Expr; import io.druid.math.expr.Parser; import io.druid.query.QueryInterruptedException; -import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BooleanFilter; @@ -1044,7 +1041,7 @@ public ValueMatcher makeValueMatcher(String dimension, final Comparable value) } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( dimension, cursor, Lists.newArrayList(storageAdapter.getAvailableDimensions()) @@ -1061,7 +1058,7 @@ public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFacto } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtil.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( dimension, cursor, Lists.newArrayList(storageAdapter.getAvailableDimensions()) diff --git a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java index f91fa7aa0291..699f0530f5c6 100644 --- a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java +++ b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java @@ -124,7 +124,7 @@ private void initDimensionHandlers() { for (String dim : availableDimensions) { ColumnCapabilities capabilities = getColumn(dim).getCapabilities(); - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities, null); + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); dimensionHandlers.put(dim, handler); } } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index adc5e467e80f..165f40e91cb2 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -217,7 +217,7 @@ public DimensionMergerLegacy makeLegacyMerger( } @Override - public DimensionQueryHelper makeQueryHelper() + public DimensionQueryHelper makeQueryHelper() { return new StringDimensionQueryHelper(dimensionName); } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java index aed0a539d329..a2cd958745ef 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -22,7 +22,6 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; -import com.google.common.collect.Lists; import com.google.common.hash.Hasher; import com.google.common.primitives.Ints; import io.druid.query.aggregation.Aggregator; @@ -40,20 +39,23 @@ import io.druid.query.topn.TopNQuery; import io.druid.segment.data.EmptyIndexedInts; import io.druid.segment.data.IndexedInts; -import org.apache.commons.lang.mutable.MutableInt; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.TreeMap; -public class StringDimensionQueryHelper implements DimensionQueryHelper +public class StringDimensionQueryHelper implements DimensionQueryHelper { private static final int GROUP_BY_MISSING_VALUE = -1; + public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; + public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; private final String dimensionName; private static Comparator GROUPING_KEY_COMPARATOR = new Comparator() @@ -67,50 +69,43 @@ public int compare(byte[] o1, byte[] o2) } }; - private static boolean isComparableNullOrEmpty(final Comparable value) - { - if (value instanceof String) { - return Strings.isNullOrEmpty((String) value); - } - return value == null; - } - public StringDimensionQueryHelper(String dimensionName) { this.dimensionName = dimensionName; } @Override - public Object getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory) + public DimensionSelector getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory) { return columnSelectorFactory.makeDimensionSelector(dimensionSpec); } @Override - public int getRowSize(Object rowValues) + public int getRowSize(IndexedInts rowValues) { - return ((IndexedInts) rowValues).size(); + return rowValues.size(); } @Override - public int getCardinality(Object valueSelector) + public int getCardinality(DimensionSelector valueSelector) { - return ((DimensionSelector) valueSelector).getValueCardinality(); + return valueSelector.getValueCardinality(); } - public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final Comparable value) + @Override + public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final String value) { final DimensionSelector selector = cursor.makeDimensionSelector( new DefaultDimensionSpec(dimensionName, dimensionName) ); // if matching against null, rows with size 0 should also match - final boolean matchNull = isComparableNullOrEmpty(value); + final boolean matchNull = Strings.isNullOrEmpty(value); final int cardinality = selector.getValueCardinality(); if (cardinality >= 0) { // Dictionary-encoded dimension. Compare by id instead of by value to save time. - final int valueId = selector.lookupId((String) value); + final int valueId = selector.lookupId(value); return new ValueMatcher() { @@ -157,6 +152,7 @@ public boolean matches() } } + @Override public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory) { final DimensionSelector selector = cursor.makeDimensionSelector( @@ -221,38 +217,38 @@ public boolean matches() } } - public void hashRow(Object dimSelector, Hasher hasher) + @Override + public void hashRow(DimensionSelector dimSelector, Hasher hasher) { - final DimensionSelector selector = (DimensionSelector) dimSelector; - final IndexedInts row = selector.getRow(); + final IndexedInts row = dimSelector.getRow(); final int size = row.size(); // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. if (size == 1) { - final String value = selector.lookupName(row.get(0)); - hasher.putUnencodedChars(value != null ? value : CardinalityAggregator.NULL_STRING); + final String value = dimSelector.lookupName(row.get(0)); + hasher.putUnencodedChars(convertValueForCardinalityAggregator(value)); } else if (size != 0) { final String[] values = new String[size]; for (int i = 0; i < size; ++i) { - final String value = selector.lookupName(row.get(i)); - values[i] = value != null ? value : CardinalityAggregator.NULL_STRING; + final String value = dimSelector.lookupName(row.get(i)); + values[i] = convertValueForCardinalityAggregator(value); } // Values need to be sorted to ensure consistent multi-value ordering across different segments Arrays.sort(values); for (int i = 0; i < size; ++i) { if (i != 0) { - hasher.putChar(CardinalityAggregator.SEPARATOR); + hasher.putChar(CARDINALITY_AGG_SEPARATOR); } hasher.putUnencodedChars(values[i]); } } } - public void hashValues(Object dimSelector, HyperLogLogCollector collector) + @Override + public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) { - final DimensionSelector selector = (DimensionSelector) dimSelector; - for (final Integer index : selector.getRow()) { - final String value = selector.lookupName(index); - collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(value == null ? CardinalityAggregator.NULL_STRING : value).asBytes()); + for (final Integer index : dimSelector.getRow()) { + final String value = dimSelector.lookupName(index); + collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(convertValueForCardinalityAggregator(value)).asBytes()); } } @@ -262,6 +258,14 @@ public int getGroupingKeySize() return Ints.BYTES; } + @Override + public int compareGroupingKeys(ByteBuffer b1, int pos1, ByteBuffer b2, int pos2) + { + final int v1 = b1.getInt(pos1); + final int v2 = b2.getInt(pos2); + return Ints.compare(v1, v2); + } + @Override public Comparator getGroupingKeyByteComparator() { @@ -269,32 +273,31 @@ public Comparator getGroupingKeyByteComparator() } @Override - public void readDimValueFromGroupingKey( - Map theEvent, String outputName, Object dimSelector, ByteBuffer keyBuffer + public void processDimValueFromGroupingKey( + String outputName, DimensionSelector dimSelector, ByteBuffer keyBuffer, Map theEvent ) { - final DimensionSelector selector = (DimensionSelector) dimSelector; final int dimVal = keyBuffer.getInt(); if (dimVal != GROUP_BY_MISSING_VALUE) { - theEvent.put(outputName, selector.lookupName(dimVal)); + theEvent.put(outputName, dimSelector.lookupName(dimVal)); } } @Override public List addDimValuesToGroupingKey( - Object selector, ByteBuffer key, Function> updateValuesFn + DimensionSelector selector, ByteBuffer key, Function> updateValuesFn ) { List unaggregatedBuffers = null; - final DimensionSelector dimSelector = (DimensionSelector) selector; - final IndexedInts row = dimSelector.getRow(); + final IndexedInts row = selector.getRow(); if (row == null || row.size() == 0) { ByteBuffer newKey = key.duplicate(); newKey.putInt(GROUP_BY_MISSING_VALUE); unaggregatedBuffers = updateValuesFn.apply(newKey); } else { - for (Integer dimValue : row) { + for (IntIterator rowIt = row.iterator(); rowIt.hasNext(); ) { ByteBuffer newKey = key.duplicate(); + int dimValue = rowIt.nextInt(); newKey.putInt(dimValue); unaggregatedBuffers = updateValuesFn.apply(newKey); } @@ -303,50 +306,46 @@ public List addDimValuesToGroupingKey( } @Override - public Object getRowFromDimSelector(Object dimSelector) { - final DimensionSelector selector = (DimensionSelector) dimSelector; - IndexedInts values = selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); - return values; + public IndexedInts getRowFromDimSelector(DimensionSelector selector) { + return selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); } @Override - public int initializeGroupingKeyV2Dimension( - final Object valuesObj, + public void initializeGroupingKeyV2Dimension( + final IndexedInts values, final ByteBuffer keyBuffer, final int keyBufferPosition ) { - IndexedInts values = (IndexedInts) valuesObj; - final int rowSize = values.size(); + int rowSize = values.size(); if (rowSize == 0) { keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); } else { keyBuffer.putInt(keyBufferPosition, values.get(0)); } - return rowSize; } @Override public void addValueToGroupingKeyV2( - final Object values, + final IndexedInts values, final int rowValueIdx, final ByteBuffer keyBuffer, final int keyBufferPosition ) { - IndexedInts intValues = (IndexedInts) values; keyBuffer.putInt( keyBufferPosition, - intValues.get(rowValueIdx) + values.get(rowValueIdx) ); } @Override - public void readValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, Map resultMap, ByteBuffer key) + public void processValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, ByteBuffer key, Map resultMap) { final int id = key.getInt(dimInfo.keyBufferPosition); - if (id >= 0) { + // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. + if (id != GROUP_BY_MISSING_VALUE) { resultMap.put( dimInfo.spec.getOutputName(), ((DimensionSelector) dimInfo.selector).lookupName(id) @@ -357,6 +356,11 @@ public void readValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, Map aggregatesStore, Cursor cursor, TopNQuery query) + public void dimExtractionScanAndAggregate(DimensionSelector selector, Aggregator[][] rowSelector, Map aggregatesStore, Cursor cursor, TopNQuery query) { - final DimensionSelector dimSelector = (DimensionSelector) selector; - final IndexedInts dimValues = dimSelector.getRow(); + final IndexedInts dimValues = selector.getRow(); for (int i = 0; i < dimValues.size(); ++i) { final int dimIndex = dimValues.get(i); Aggregator[] theAggregators = rowSelector[dimIndex]; if (theAggregators == null) { - final String key = dimSelector.lookupName(dimIndex); + final String key = selector.lookupName(dimIndex); theAggregators = aggregatesStore.get(key); if (theAggregators == null) { theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); @@ -397,9 +397,8 @@ public void dimExtractionScanAndAggregate(Object selector, Aggregator[][] rowSel @Override - public void addRowValuesToSelectResult(String outputName, Object dimSelector, Map theEvent) + public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map theEvent) { - final DimensionSelector selector = (DimensionSelector) dimSelector; if (selector == null) { theEvent.put(outputName, null); } else { @@ -409,7 +408,7 @@ public void addRowValuesToSelectResult(String outputName, Object dimSelector, Ma final String dimVal = selector.lookupName(vals.get(0)); theEvent.put(outputName, dimVal); } else { - List dimVals = Lists.newArrayList(); + List dimVals = new ArrayList<>(vals.size()); for (int i = 0; i < vals.size(); ++i) { dimVals.add(selector.lookupName(vals.get(i))); } @@ -419,20 +418,20 @@ public void addRowValuesToSelectResult(String outputName, Object dimSelector, Ma } @Override - public void updateSearchResultSet(String outputName, Object dimSelector, SearchQuerySpec searchQuerySpec, TreeMap set, int limit) + public void updateSearchResultSet( + String outputName, + DimensionSelector selector, + SearchQuerySpec searchQuerySpec, + int limit, + final Object2IntRBTreeMap set + ) { - final DimensionSelector selector = (DimensionSelector) dimSelector; - if (selector != null) { final IndexedInts vals = selector.getRow(); for (int i = 0; i < vals.size(); ++i) { final String dimVal = selector.lookupName(vals.get(i)); if (searchQuerySpec.accept(dimVal)) { - MutableInt counter = new MutableInt(1); - MutableInt prev = set.put(new SearchHit(outputName, dimVal), counter); - if (prev != null) { - counter.add(prev.intValue()); - } + set.addTo(new SearchHit(outputName, dimVal), 1); if (set.size() >= limit) { return; } @@ -440,4 +439,11 @@ public void updateSearchResultSet(String outputName, Object dimSelector, SearchQ } } } + + // CardinalityAggregator has a special representation for nulls + private String convertValueForCardinalityAggregator(String value) + { + return value == null ? CARDINALITY_AGG_NULL_STRING : value; + } + } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 532c41ab6d87..0cff1bba5c42 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -49,7 +49,7 @@ import io.druid.query.extraction.ExtractionFn; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionHandler; -import io.druid.segment.DimensionHandlerUtil; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionIndexer; import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; @@ -57,7 +57,6 @@ import io.druid.segment.Metadata; import io.druid.segment.NumericColumnSelector; import io.druid.segment.ObjectColumnSelector; -import io.druid.segment.StringDimensionHandler; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilitiesImpl; @@ -207,8 +206,7 @@ public ColumnCapabilities getColumnCapabilities(String columnName) // This ColumnSelectorFactory implementation has no knowledge of column capabilities. // However, this method may still be called by FilteredAggregatorFactory's ValueMatcherFactory // to check column types. - // Just return null, the caller will assume default types in that case. - //return null; + // If column capabilities are not available, return null, the caller will assume default types in that case. return columnCapabilities == null ? null : columnCapabilities.get(columnName); } @@ -409,14 +407,11 @@ public IncrementalIndex( if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { capabilities.setHasSpatialIndexes(true); } else { - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities( + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( dimName, capabilities, dimSchema.getMultiValueHandling() ); - if (handler == null) { - handler = new StringDimensionHandler(dimName, null); - } addNewDimension(dimName, capabilities, handler); } columnCapabilities.put(dimName, capabilities); @@ -577,10 +572,7 @@ TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException capabilities.setHasBitmapIndexes(true); columnCapabilities.put(dimension, capabilities); } - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimension, capabilities, null); - if (handler == null) { - handler = new StringDimensionHandler(dimension, null); - } + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); desc = addNewDimension(dimension, capabilities, handler); } DimensionHandler handler = desc.getHandler(); @@ -760,10 +752,7 @@ public void loadDimensionIterable(Iterable oldDimensionOrder, Map Date: Fri, 11 Nov 2016 14:41:47 -0800 Subject: [PATCH 03/12] Address PR comments --- .../aggregation/cardinality/CardinalityAggregatorFactory.java | 2 +- .../main/java/io/druid/segment/StringDimensionQueryHelper.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index dc994eb896d7..caf77d66067d 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -158,7 +158,7 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) private List makeDimensionInfoList(final ColumnSelectorFactory columnSelectorFactory) { - List dimInfoList = new ArrayList(fields.size()); + List dimInfoList = new ArrayList<>(fields.size()); for (DimensionSpec dimSpec : fields) { DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimSpec.getDimension(), columnSelectorFactory, null); ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, columnSelectorFactory); diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java index a2cd958745ef..f309b7c09a0c 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -246,7 +246,8 @@ public void hashRow(DimensionSelector dimSelector, Hasher hasher) @Override public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) { - for (final Integer index : dimSelector.getRow()) { + for (IntIterator rowIt = dimSelector.getRow().iterator(); rowIt.hasNext(); ) { + int index = rowIt.nextInt(); final String value = dimSelector.lookupName(index); collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(convertValueForCardinalityAggregator(value)).asBytes()); } From ebebbd16cb2f6ba4b4f8f82a78b8d05f54812d6b Mon Sep 17 00:00:00 2001 From: jon-wei Date: Fri, 11 Nov 2016 15:17:08 -0800 Subject: [PATCH 04/12] Use getters for QueryDimensionInfo --- .../io/druid/query/QueryDimensionInfo.java | 49 ++++++++++++--- .../cardinality/CardinalityAggregator.java | 4 +- .../query/groupby/GroupByQueryEngine.java | 16 ++--- .../epinephelinae/GroupByQueryEngineV2.java | 60 +++++++++---------- .../druid/query/search/SearchQueryRunner.java | 6 +- .../druid/query/select/SelectQueryEngine.java | 2 +- .../topn/DimExtractionTopNAlgorithm.java | 6 +- .../druid/query/topn/PooledTopNAlgorithm.java | 2 +- .../java/io/druid/query/topn/TopNParams.java | 4 +- .../segment/StringDimensionQueryHelper.java | 6 +- 10 files changed, 95 insertions(+), 60 deletions(-) diff --git a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java index 8e1d3e7b72d5..1e5bd9fc7669 100644 --- a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java +++ b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java @@ -33,37 +33,37 @@ public class QueryDimensionInfo /** * The DimensionSpec representing this QueryDimensionInfo's dimension, taken from the query being processed. */ - public final DimensionSpec spec; + private final DimensionSpec spec; /** * Helper object that handles type-specific operations for this dimension within query processing engines. */ - public final DimensionQueryHelper queryHelper; + private final DimensionQueryHelper queryHelper; /** * Internal name of the dimension. */ - public final String name; + private final String name; /** * Name of the dimension to be returned in query results. */ - public final String outputName; + private final String outputName; /** * Column value selector for this dimension, e.g. a DimensionSelector for String dimensions. */ - public final ColumnValueSelector selector; + private final ColumnValueSelector selector; /** * Cardinality of the dimension's value set, taken from the queryHelper. */ - public final int cardinality; + private final int cardinality; /** * Used by the GroupBy engines, indicates the offset of this dimension's value within the grouping key. */ - public final int keyBufferPosition; + private final int keyBufferPosition; public QueryDimensionInfo( @@ -81,4 +81,39 @@ public QueryDimensionInfo( this.cardinality = queryHelper.getCardinality(selector); this.keyBufferPosition = keyBufferPosition; } + + public DimensionSpec getSpec() + { + return spec; + } + + public DimensionQueryHelper getQueryHelper() + { + return queryHelper; + } + + public String getName() + { + return name; + } + + public String getOutputName() + { + return outputName; + } + + public ColumnValueSelector getSelector() + { + return selector; + } + + public int getCardinality() + { + return cardinality; + } + + public int getKeyBufferPosition() + { + return keyBufferPosition; + } } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index ef9d2440582d..a370d569d955 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -45,7 +45,7 @@ protected static void hashRow(List dimInfoList, HyperLogLogC } QueryDimensionInfo dimInfo = dimInfoList.get(k); - dimInfo.queryHelper.hashRow(dimInfo.selector, hasher); + dimInfo.getQueryHelper().hashRow(dimInfo.getSelector(), hasher); } collector.add(hasher.hash().asBytes()); } @@ -53,7 +53,7 @@ protected static void hashRow(List dimInfoList, HyperLogLogC protected static void hashValues(List dimInfoList, HyperLogLogCollector collector) { for (final QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.queryHelper.hashValues(dimInfo.selector, collector); + dimInfo.getQueryHelper().hashValues(dimInfo.getSelector(), collector); } } diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index e48adf9ec6c2..47d29e92dee2 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -78,8 +78,8 @@ private static Comparator makeKeyComparator(final List() @@ -223,8 +223,8 @@ private List updateValues( List unaggregatedBuffers = null; final QueryDimensionInfo dimInfo = dims.get(curIdx); - final ColumnValueSelector selector = dimInfo.selector; - final DimensionQueryHelper queryHelper = dimInfo.queryHelper; + final ColumnValueSelector selector = dimInfo.getSelector(); + final DimensionQueryHelper queryHelper = dimInfo.getQueryHelper(); final Function> updateValuesFn = new Function>() { @Override @@ -387,7 +387,7 @@ private int getTotalKeySize() { int keySize = 0; for (QueryDimensionInfo info : dimInfoList) { - keySize += info.queryHelper.getGroupingKeySize(); + keySize += info.getQueryHelper().getGroupingKeySize(); } return keySize; } @@ -453,9 +453,9 @@ public Row apply(@Nullable Map.Entry input) ByteBuffer keyBuffer = input.getKey().duplicate(); for (QueryDimensionInfo dimInfo : dimInfoList) { - final ColumnValueSelector dimSelector = dimInfo.selector; - dimInfo.queryHelper.processDimValueFromGroupingKey( - dimInfo.outputName, + final ColumnValueSelector dimSelector = dimInfo.getSelector(); + dimInfo.getQueryHelper().processDimValueFromGroupingKey( + dimInfo.getOutputName(), dimSelector, keyBuffer, theEvent diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index a2c0811c56d3..a1e514887b3f 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -59,6 +59,27 @@ public class GroupByQueryEngineV2 { + private static QueryDimensionInfo[] getDimensionInfo(GroupByQuery query, StorageAdapter adapter, Cursor cursor) + { + int dimCount = query.getDimensions().size(); + int curPos = 0; + QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; + + for (int i = 0; i < dimCount; i++) { + final DimensionSpec dimSpec = query.getDimensions().get(i); + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( + dimSpec.getDimension(), + cursor, + Lists.newArrayList(adapter.getAvailableDimensions()) + ); + final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, selector, curPos); + dims[i] = dimInfo; + curPos += queryHelper.getGroupingKeySize(); + } + return dims; + } + private GroupByQueryEngineV2() { // No instantiation @@ -147,27 +168,6 @@ public void close() throws IOException ); } - private static QueryDimensionInfo[] getDimensionInfo(GroupByQuery query, StorageAdapter adapter, Cursor cursor) - { - int dimCount = query.getDimensions().size(); - int curPos = 0; - QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; - - for (int i = 0; i < dimCount; i++) { - final DimensionSpec dimSpec = query.getDimensions().get(i); - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - dimSpec.getDimension(), - cursor, - Lists.newArrayList(adapter.getAvailableDimensions()) - ); - final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, selector, curPos); - dims[i] = dimInfo; - curPos += queryHelper.getGroupingKeySize(); - } - return dims; - } - private static class GroupByEngineIterator implements Iterator, Closeable { private final GroupByQuery query; @@ -245,10 +245,10 @@ public Row next() stackp = stack.length - 1; for (int i = 0; i < dims.length; i++) { - final DimensionQueryHelper queryHelper = dims[i].queryHelper; - valuess[i] = queryHelper.getRowFromDimSelector(dims[i].selector); + final DimensionQueryHelper queryHelper = dims[i].getQueryHelper(); + valuess[i] = queryHelper.getRowFromDimSelector(dims[i].getSelector()); int rowSize = queryHelper.getRowSize(valuess[i]); - queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); stack[i] = rowSize == 0 ? 0 : 1; } } @@ -267,13 +267,13 @@ public Row next() doAggregate = false; } - if (stackp >= 0 && stack[stackp] < dims[stackp].queryHelper.getRowSize(valuess[stackp])) { + if (stackp >= 0 && stack[stackp] < dims[stackp].getQueryHelper().getRowSize(valuess[stackp])) { // Load next value for current slot - dims[stackp].queryHelper.addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].keyBufferPosition); + dims[stackp].getQueryHelper().addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].getKeyBufferPosition()); stack[stackp]++; for (int i = stackp + 1; i < stack.length; i++) { - int rowSize = dims[i].queryHelper.getRowSize(valuess[i]); - dims[i].queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].keyBufferPosition); + int rowSize = dims[i].getQueryHelper().getRowSize(valuess[i]); + dims[i].getQueryHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); stack[i] = rowSize == 0 ? 0 : 1; } stackp = stack.length - 1; @@ -300,7 +300,7 @@ public Row apply(final Grouper.Entry entry) // Add dimensions. for (QueryDimensionInfo dimInfo : dims) { - dimInfo.queryHelper.processValueFromGroupingKeyV2(dimInfo, entry.getKey(), theMap); + dimInfo.getQueryHelper().processValueFromGroupingKeyV2(dimInfo, entry.getKey(), theMap); } // Add aggregations. @@ -353,7 +353,7 @@ public GroupByEngineKeySerde(final QueryDimensionInfo dims[]) { int keySize = 0; for (QueryDimensionInfo dimInfo : dims) { - keySize += dimInfo.queryHelper.getGroupingKeySize(); + keySize += dimInfo.getQueryHelper().getGroupingKeySize(); } this.keySize = keySize; } diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index dd59689b4e09..1c4bcca80e56 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -276,9 +276,9 @@ public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap while (!cursor.isDone()) { for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.queryHelper.updateSearchResultSet( - dimInfo.outputName, - dimInfo.selector, + dimInfo.getQueryHelper().updateSearchResultSet( + dimInfo.getOutputName(), + dimInfo.getSelector(), searchQuerySpec, limit, set diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index 7821f226e5c3..e85bd62e78a7 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -136,7 +136,7 @@ public Result apply(Cursor cursor) theEvent.put(EventHolder.timestampKey, new DateTime(timestampColumnSelector.get())); for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.queryHelper.addRowValuesToSelectResult(dimInfo.outputName, dimInfo.selector, theEvent); + dimInfo.getQueryHelper().addRowValuesToSelectResult(dimInfo.getOutputName(), dimInfo.getSelector(), theEvent); } for (Map.Entry metSelector : metSelectors.entrySet()) { diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 243f90852d29..6174512d27e0 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -61,7 +61,7 @@ public TopNParams makeInitParams( protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) { QueryDimensionInfo dimInfo = params.getDimInfo(); - return dimInfo.queryHelper.getDimExtractionRowSelector(params, query, capabilities); + return dimInfo.getQueryHelper().getDimExtractionRowSelector(params, query, capabilities); } @Override @@ -88,8 +88,8 @@ public void scanAndAggregate( final QueryDimensionInfo dimInfo = params.getDimInfo(); while (!cursor.isDone()) { - dimInfo.queryHelper.dimExtractionScanAndAggregate( - dimInfo.selector, + dimInfo.getQueryHelper().dimExtractionScanAndAggregate( + dimInfo.getSelector(), rowSelector, aggregatesStore, cursor, diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index 57140078bb88..c6e7fb7e1c89 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -65,7 +65,7 @@ public PooledTopNParams makeInitParams( ByteBuffer resultsBuf = resultsBufHolder.get(); resultsBuf.clear(); - final DimensionSelector dimSelector = (DimensionSelector) dimInfo.selector; + final DimensionSelector dimSelector = (DimensionSelector) dimInfo.getSelector(); final int cardinality = dimSelector.getValueCardinality(); if (cardinality < 0) { diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index 24f17087b7c5..d31fe7973d1b 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -39,7 +39,7 @@ protected TopNParams( { this.dimInfo = dimInfo; this.cursor = cursor; - this.cardinality = dimInfo.cardinality; + this.cardinality = dimInfo.getCardinality(); this.numValuesPerPass = numValuesPerPass; if (cardinality < 0) { @@ -49,7 +49,7 @@ protected TopNParams( public Object getDimSelector() { - return dimInfo.selector; + return dimInfo.getSelector(); } public QueryDimensionInfo getDimInfo() diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java index f309b7c09a0c..af3ef0426cdb 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -343,13 +343,13 @@ public void addValueToGroupingKeyV2( @Override public void processValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, ByteBuffer key, Map resultMap) { - final int id = key.getInt(dimInfo.keyBufferPosition); + final int id = key.getInt(dimInfo.getKeyBufferPosition()); // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. if (id != GROUP_BY_MISSING_VALUE) { resultMap.put( - dimInfo.spec.getOutputName(), - ((DimensionSelector) dimInfo.selector).lookupName(id) + dimInfo.getOutputName(), + ((DimensionSelector) dimInfo.getSelector()).lookupName(id) ); } } From 2a43235ed713b151652cdb5d98a2ca12403ccfa1 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Fri, 11 Nov 2016 16:43:40 -0800 Subject: [PATCH 05/12] Split DimensionQueryHelper into base interface and query-specific interfaces --- .../io/druid/query/QueryDimensionInfo.java | 33 +- .../FilteredAggregatorFactory.java | 13 +- .../cardinality/CardinalityAggregator.java | 16 +- .../CardinalityAggregatorFactory.java | 129 +++++++- .../CardinalityBufferAggregator.java | 4 +- .../query/dimension/QueryTypeHelper.java | 24 ++ .../dimension/QueryTypeHelperFactory.java | 27 ++ .../query/groupby/GroupByQueryEngine.java | 143 +++------ .../epinephelinae/GroupByQueryEngineV2.java | 230 +++++++++++-- .../druid/query/search/SearchQueryRunner.java | 100 +++++- .../druid/query/select/SelectQueryEngine.java | 92 +++++- .../topn/DimExtractionTopNAlgorithm.java | 8 +- .../java/io/druid/query/topn/TopNMapFn.java | 149 ++++++++- .../druid/segment/DimensionHandlerUtils.java | 94 +++++- .../druid/segment/DimensionQueryHelper.java | 301 +----------------- .../segment/QueryableIndexStorageAdapter.java | 8 +- .../segment/StringDimensionQueryHelper.java | 257 +-------------- .../CardinalityAggregatorBenchmark.java | 4 +- .../CardinalityAggregatorTest.java | 85 ++++- 19 files changed, 934 insertions(+), 783 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java create mode 100644 processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java diff --git a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java index 1e5bd9fc7669..74e48fa92e5b 100644 --- a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java +++ b/processing/src/main/java/io/druid/query/QueryDimensionInfo.java @@ -20,6 +20,7 @@ package io.druid.query; import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.QueryTypeHelper; import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionQueryHelper; @@ -28,7 +29,7 @@ * * Each QueryDimensionInfo is associated with a single dimension. */ -public class QueryDimensionInfo +public class QueryDimensionInfo { /** * The DimensionSpec representing this QueryDimensionInfo's dimension, taken from the query being processed. @@ -36,10 +37,16 @@ public class QueryDimensionInfo private final DimensionSpec spec; /** - * Helper object that handles type-specific operations for this dimension within query processing engines. + * Helper object that handles general type-specific operations for this dimension within query processing engines. */ private final DimensionQueryHelper queryHelper; + /** + * Helper object that handles row value operations that pertain to a specific query type for this + * dimension within query processing engines. + */ + private final QueryTypeHelperClass queryTypeHelper; + /** * Internal name of the dimension. */ @@ -60,26 +67,20 @@ public class QueryDimensionInfo */ private final int cardinality; - /** - * Used by the GroupBy engines, indicates the offset of this dimension's value within the grouping key. - */ - private final int keyBufferPosition; - - public QueryDimensionInfo( DimensionSpec spec, DimensionQueryHelper queryHelper, - ColumnValueSelector selector, - int keyBufferPosition + QueryTypeHelperClass queryTypeHelper, + ColumnValueSelector selector ) { this.spec = spec; this.queryHelper = queryHelper; + this.queryTypeHelper = queryTypeHelper; this.name = spec.getDimension(); this.outputName = spec.getOutputName(); this.selector = selector; this.cardinality = queryHelper.getCardinality(selector); - this.keyBufferPosition = keyBufferPosition; } public DimensionSpec getSpec() @@ -92,6 +93,11 @@ public DimensionQueryHelper getQueryHelper() return queryHelper; } + public QueryTypeHelperClass getQueryTypeHelper() + { + return queryTypeHelper; + } + public String getName() { return name; @@ -111,9 +117,4 @@ public int getCardinality() { return cardinality; } - - public int getKeyBufferPosition() - { - return keyBufferPosition; - } } diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index c16bbfd6f84d..5bc17a1e9c3b 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -223,7 +223,12 @@ public ValueMatcher makeValueMatcher(final String dimension, final Comparable va ); } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimension, columnSelectorFactory, null); + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + dimension, + columnSelectorFactory.getColumnCapabilities(dimension), + null + ); + return queryHelper.getValueMatcher(columnSelectorFactory, value); } @@ -234,7 +239,11 @@ public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicat case LONG: return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); case STRING: - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimension, columnSelectorFactory, null); + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + dimension, + columnSelectorFactory.getColumnCapabilities(dimension), + null + ); return queryHelper.getValueMatcher(columnSelectorFactory, predicateFactory); default: return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index a370d569d955..597e7e8496ba 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -31,12 +31,12 @@ public class CardinalityAggregator implements Aggregator { private final String name; - private final List dimInfoList; + private final List> dimInfoList; private final boolean byRow; public static final HashFunction hashFn = Hashing.murmur3_128(); - protected static void hashRow(List dimInfoList, HyperLogLogCollector collector) + protected static void hashRow(List> dimInfoList, HyperLogLogCollector collector) { final Hasher hasher = hashFn.newHasher(); for (int k = 0; k < dimInfoList.size(); ++k) { @@ -44,16 +44,16 @@ protected static void hashRow(List dimInfoList, HyperLogLogC hasher.putByte((byte) 0); } - QueryDimensionInfo dimInfo = dimInfoList.get(k); - dimInfo.getQueryHelper().hashRow(dimInfo.getSelector(), hasher); + QueryDimensionInfo dimInfo = dimInfoList.get(k); + dimInfo.getQueryTypeHelper().hashRow(dimInfo.getSelector(), hasher); } collector.add(hasher.hash().asBytes()); } - protected static void hashValues(List dimInfoList, HyperLogLogCollector collector) + protected static void hashValues(List> dimInfoList, HyperLogLogCollector collector) { - for (final QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryHelper().hashValues(dimInfo.getSelector(), collector); + for (final QueryDimensionInfo dimInfo : dimInfoList) { + dimInfo.getQueryTypeHelper().hashValues(dimInfo.getSelector(), collector); } } @@ -61,7 +61,7 @@ protected static void hashValues(List dimInfoList, HyperLogL public CardinalityAggregator( String name, - List dimInfoList, + List> dimInfoList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index caf77d66067d..1d65cc30c9f2 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.hash.Hasher; import io.druid.java.util.common.StringUtils; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; @@ -36,14 +37,21 @@ import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.QueryTypeHelperFactory; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; +import io.druid.segment.DimensionSelector; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.IndexedInts; +import it.unimi.dsi.fastutil.ints.IntIterator; import org.apache.commons.codec.binary.Base64; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -95,11 +103,102 @@ public static Object estimateCardinality(Object object) private static final byte CACHE_TYPE_ID = (byte) 0x8; private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF; + private static final CardinalityAggregatorTypeHelperFactory TYPE_HELPER_FACTORY = new CardinalityAggregatorTypeHelperFactory(); + private final String name; private final List fields; private final boolean byRow; + private static class CardinalityAggregatorTypeHelperFactory implements QueryTypeHelperFactory + { + @Override + public CardinalityAggregatorTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringCardinalityAggregatorTypeHelper(); + default: + return null; + } + } + } + + public interface CardinalityAggregatorTypeHelper extends QueryTypeHelper + { + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * + * @param dimSelector Dimension value selector + * @param hasher Hasher used for cardinality aggregator calculations + */ + void hashRow(ValueSelectorType dimSelector, Hasher hasher); + + + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * @param dimSelector Dimension value selector + * @param collector HLL collector used for cardinality aggregator calculations + */ + void hashValues(ValueSelectorType dimSelector, HyperLogLogCollector collector); + } + + public static class StringCardinalityAggregatorTypeHelper implements CardinalityAggregatorTypeHelper + { + public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; + public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; + + @Override + public void hashRow(DimensionSelector dimSelector, Hasher hasher) + { + final IndexedInts row = dimSelector.getRow(); + final int size = row.size(); + // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. + if (size == 1) { + final String value = dimSelector.lookupName(row.get(0)); + hasher.putUnencodedChars(convertValueForCardinalityAggregator(value)); + } else if (size != 0) { + final String[] values = new String[size]; + for (int i = 0; i < size; ++i) { + final String value = dimSelector.lookupName(row.get(i)); + values[i] = convertValueForCardinalityAggregator(value); + } + // Values need to be sorted to ensure consistent multi-value ordering across different segments + Arrays.sort(values); + for (int i = 0; i < size; ++i) { + if (i != 0) { + hasher.putChar(CARDINALITY_AGG_SEPARATOR); + } + hasher.putUnencodedChars(values[i]); + } + } + } + + @Override + public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) + { + for (IntIterator rowIt = dimSelector.getRow().iterator(); rowIt.hasNext(); ) { + int index = rowIt.nextInt(); + final String value = dimSelector.lookupName(index); + collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(convertValueForCardinalityAggregator(value)).asBytes()); + } + } + + // CardinalityAggregator has a special representation for nulls + private String convertValueForCardinalityAggregator(String value) + { + return value == null ? CARDINALITY_AGG_NULL_STRING : value; + } + } + + @JsonCreator public CardinalityAggregatorFactory( @JsonProperty("name") String name, @@ -134,7 +233,13 @@ public CardinalityAggregatorFactory( @Override public Aggregator factorize(final ColumnSelectorFactory columnFactory) { - List dimInfoList = makeDimensionInfoList(columnFactory); + List> dimInfoList = + Arrays.asList(DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + fields, + null, + columnFactory + )); if (dimInfoList.isEmpty()) { return Aggregators.noopAggregator(); @@ -147,7 +252,13 @@ public Aggregator factorize(final ColumnSelectorFactory columnFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) { - List dimInfoList = makeDimensionInfoList(columnFactory); + List> dimInfoList = + Arrays.asList(DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + fields, + null, + columnFactory + )); if (dimInfoList.isEmpty()) { return Aggregators.noopBufferAggregator(); @@ -156,18 +267,6 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) return new CardinalityBufferAggregator(dimInfoList, byRow); } - private List makeDimensionInfoList(final ColumnSelectorFactory columnSelectorFactory) - { - List dimInfoList = new ArrayList<>(fields.size()); - for (DimensionSpec dimSpec : fields) { - DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper(dimSpec.getDimension(), columnSelectorFactory, null); - ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, columnSelectorFactory); - QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); - dimInfoList.add(dimInfo); - } - return dimInfoList; - } - @Override public Comparator getComparator() { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index 52c2203f18d9..6fa28f1bc1a7 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -28,13 +28,13 @@ public class CardinalityBufferAggregator implements BufferAggregator { - private final List dimInfoList; + private final List> dimInfoList; private final boolean byRow; private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); public CardinalityBufferAggregator( - List dimInfoList, + List> dimInfoList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java new file mode 100644 index 000000000000..4c133bd40491 --- /dev/null +++ b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java @@ -0,0 +1,24 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.dimension; + +public interface QueryTypeHelper +{ +} diff --git a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java new file mode 100644 index 000000000000..71ddfb103f42 --- /dev/null +++ b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java @@ -0,0 +1,27 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.dimension; + +import io.druid.segment.column.ColumnCapabilities; + +public interface QueryTypeHelperFactory +{ + QueryTypeHelperClass makeQueryTypeHelper(String dimName, ColumnCapabilities capabilities); +} diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index 47d29e92dee2..eacf2b397f07 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -25,6 +25,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.primitives.Ints; import com.google.inject.Inject; import io.druid.collections.ResourceHolder; import io.druid.collections.StupidPool; @@ -42,14 +43,12 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.aggregation.PostAggregator; -import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.Filter; -import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; -import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; +import io.druid.segment.DimensionSelector; import io.druid.segment.StorageAdapter; +import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -58,7 +57,7 @@ import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Comparator; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -68,43 +67,11 @@ */ public class GroupByQueryEngine { + private static final int MISSING_VALUE = -1; + private final Supplier config; private final StupidPool intermediateResultsBufferPool; - private static Comparator makeKeyComparator(final List dimInfo) - { - final int maxDimIndex = dimInfo.size(); - final DimensionQueryHelper[] queryHelpers = new DimensionQueryHelper[maxDimIndex]; - final int[] keySizes = new int[maxDimIndex]; - - for (int i = 0; i < maxDimIndex; i++) { - queryHelpers[i] = dimInfo.get(i).getQueryHelper(); - keySizes[i] = queryHelpers[i].getGroupingKeySize(); - } - - return new Comparator() - { - public int compare(ByteBuffer o1, ByteBuffer o2) - { - int pos = 0; - int limit = o1.limit(); - int ret = 0; - int dimIndex = 0; - - while (pos < limit && dimIndex < maxDimIndex) { - ret = queryHelpers[dimIndex].compareGroupingKeys(o1, pos, o2, pos); - pos += keySizes[dimIndex]; - if (ret != 0) { - return ret; - } - dimIndex++; - } - return ret; - } - }; - } - - @Inject public GroupByQueryEngine( Supplier config, @@ -154,7 +121,7 @@ public Sequence apply(final Cursor cursor) @Override public RowIterator make() { - return new RowIterator(query, cursor, bufferHolder.get(), config.get(), storageAdapter); + return new RowIterator(query, cursor, bufferHolder.get(), config.get()); } @Override @@ -185,21 +152,19 @@ private static class RowUpdater private final BufferAggregator[] aggregators; private final PositionMaintainer positionMaintainer; - private final Map positions; + private final Map positions = Maps.newTreeMap(); // GroupBy queries tend to do a lot of reads from this. We co-store a hash map to make those reads go faster. private final Map positionsHash = Maps.newHashMap(); public RowUpdater( ByteBuffer metricValues, BufferAggregator[] aggregators, - PositionMaintainer positionMaintainer, - List dimInfo + PositionMaintainer positionMaintainer ) { this.metricValues = metricValues; this.aggregators = aggregators; this.positionMaintainer = positionMaintainer; - this.positions = Maps.newTreeMap(makeKeyComparator(dimInfo)); } public int getNumRows() @@ -214,27 +179,26 @@ public Map getPositions() private List updateValues( ByteBuffer key, - final List dims, - final int curIdx + List dims ) { - if (curIdx < dims.size()) { + if (dims.size() > 0) { List retVal = null; List unaggregatedBuffers = null; - final QueryDimensionInfo dimInfo = dims.get(curIdx); - final ColumnValueSelector selector = dimInfo.getSelector(); - final DimensionQueryHelper queryHelper = dimInfo.getQueryHelper(); - final Function> updateValuesFn = new Function>() - { - @Override - public List apply(ByteBuffer input) - { - return updateValues(input, dims, curIdx + 1); + final DimensionSelector dimSelector = dims.get(0); + final IndexedInts row = dimSelector.getRow(); + if (row == null || row.size() == 0) { + ByteBuffer newKey = key.duplicate(); + newKey.putInt(MISSING_VALUE); + unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size())); + } else { + for (Integer dimValue : row) { + ByteBuffer newKey = key.duplicate(); + newKey.putInt(dimValue); + unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size())); } - }; - - unaggregatedBuffers = queryHelper.addDimValuesToGroupingKey(selector, key, updateValuesFn); + } if (unaggregatedBuffers != null) { if (retVal == null) { retVal = Lists.newArrayList(); @@ -333,6 +297,8 @@ private static class RowIterator implements CloseableIterator private final int maxIntermediateRows; private final List dimensionSpecs; + private final List dimensions; + private final ArrayList dimNames; private final List aggregatorSpecs; private final BufferAggregator[] aggregators; private final String[] metricNames; @@ -340,12 +306,8 @@ private static class RowIterator implements CloseableIterator private List unprocessedKeys; private Iterator delegate; - private final List dimInfoList; - - // total size of the grouping key in bytes - private final int totalKeySize; - public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config, StorageAdapter adapter) + public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBuffer, GroupByQueryConfig config) { final GroupByQueryConfig querySpecificConfig = config.withOverrides(query); @@ -357,19 +319,17 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu unprocessedKeys = null; delegate = Iterators.emptyIterator(); dimensionSpecs = query.getDimensions(); - dimInfoList = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); - - for (DimensionSpec dimSpec : dimensionSpecs) { - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - dimSpec.getDimension(), - cursor, - Lists.newArrayList(adapter.getAvailableDimensions()) - ); - final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); - QueryDimensionInfo info = new QueryDimensionInfo(dimSpec, queryHelper, selector, 0); - dimInfoList.add(info); + dimensions = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); + dimNames = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); + + for (int i = 0; i < dimensionSpecs.size(); ++i) { + final DimensionSpec dimSpec = dimensionSpecs.get(i); + final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec); + if (selector != null) { + dimensions.add(selector); + dimNames.add(dimSpec.getOutputName()); + } } - totalKeySize = getTotalKeySize(); aggregatorSpecs = query.getAggregatorSpecs(); aggregators = new BufferAggregator[aggregatorSpecs.size()]; @@ -383,15 +343,6 @@ public RowIterator(GroupByQuery query, final Cursor cursor, ByteBuffer metricsBu } } - private int getTotalKeySize() - { - int keySize = 0; - for (QueryDimensionInfo info : dimInfoList) { - keySize += info.getQueryHelper().getGroupingKeySize(); - } - return keySize; - } - @Override public boolean hasNext() { @@ -410,10 +361,10 @@ public Row next() } final PositionMaintainer positionMaintainer = new PositionMaintainer(0, sizesRequired, metricsBuffer.remaining()); - final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer, dimInfoList); + final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer); if (unprocessedKeys != null) { for (ByteBuffer key : unprocessedKeys) { - final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of(), 0); + final List unprocUnproc = rowUpdater.updateValues(key, ImmutableList.of()); if (unprocUnproc != null) { throw new ISE("Not enough memory to process the request."); } @@ -421,9 +372,9 @@ public Row next() cursor.advance(); } while (!cursor.isDone() && rowUpdater.getNumRows() < maxIntermediateRows) { - ByteBuffer key = ByteBuffer.allocate(totalKeySize); + ByteBuffer key = ByteBuffer.allocate(dimensions.size() * Ints.BYTES); - unprocessedKeys = rowUpdater.updateValues(key, dimInfoList, 0); + unprocessedKeys = rowUpdater.updateValues(key, dimensions); if (unprocessedKeys != null) { break; } @@ -452,14 +403,12 @@ public Row apply(@Nullable Map.Entry input) Map theEvent = Maps.newLinkedHashMap(); ByteBuffer keyBuffer = input.getKey().duplicate(); - for (QueryDimensionInfo dimInfo : dimInfoList) { - final ColumnValueSelector dimSelector = dimInfo.getSelector(); - dimInfo.getQueryHelper().processDimValueFromGroupingKey( - dimInfo.getOutputName(), - dimSelector, - keyBuffer, - theEvent - ); + for (int i = 0; i < dimensions.size(); ++i) { + final DimensionSelector dimSelector = dimensions.get(i); + final int dimVal = keyBuffer.getInt(); + if (MISSING_VALUE != dimVal) { + theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); + } } int position = input.getValue(); diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index a1e514887b3f..4a1ffbb96618 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -21,8 +21,8 @@ import com.google.common.base.Function; import com.google.common.base.Strings; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.primitives.Ints; import io.druid.collections.ResourceHolder; import io.druid.collections.StupidPool; import io.druid.data.input.MapBasedRow; @@ -36,15 +36,19 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.QueryDimensionInfo; -import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.QueryTypeHelperFactory; import io.druid.query.groupby.GroupByQuery; import io.druid.query.groupby.GroupByQueryConfig; import io.druid.query.groupby.strategy.GroupByStrategyV2; -import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.DimensionQueryHelper; +import io.druid.segment.DimensionSelector; import io.druid.segment.StorageAdapter; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -59,25 +63,41 @@ public class GroupByQueryEngineV2 { - private static QueryDimensionInfo[] getDimensionInfo(GroupByQuery query, StorageAdapter adapter, Cursor cursor) + private static final GroupByTypeHelperFactory TYPE_HELPER_FACTORY = new GroupByTypeHelperFactory(); + + private static GroupByDimensionInfo[] getGroupByDimInfo(QueryDimensionInfo[] baseDimInfo) { - int dimCount = query.getDimensions().size(); + GroupByDimensionInfo[] retInfo = new GroupByDimensionInfo[baseDimInfo.length]; int curPos = 0; - QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; + for (int i = 0; i < retInfo.length; i++) { + retInfo[i] = new GroupByDimensionInfo(baseDimInfo[i], curPos); + curPos += retInfo[i].getQueryTypeHelper().getGroupingKeySize(); + } + return retInfo; + } - for (int i = 0; i < dimCount; i++) { - final DimensionSpec dimSpec = query.getDimensions().get(i); - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - dimSpec.getDimension(), - cursor, - Lists.newArrayList(adapter.getAvailableDimensions()) + private static class GroupByDimensionInfo extends QueryDimensionInfo + { + /** + * Indicates the offset of this dimension's value within the grouping key. + */ + private int keyBufferPosition; + + public GroupByDimensionInfo(QueryDimensionInfo baseInfo, int keyBufferPosition) + { + super( + baseInfo.getSpec(), + baseInfo.getQueryHelper(), + baseInfo.getQueryTypeHelper(), + baseInfo.getSelector() ); - final ColumnValueSelector selector = queryHelper.getColumnValueSelector(dimSpec, cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, selector, curPos); - dims[i] = dimInfo; - curPos += queryHelper.getGroupingKeySize(); + this.keyBufferPosition = keyBufferPosition; + } + + public int getKeyBufferPosition() + { + return keyBufferPosition; } - return dims; } private GroupByQueryEngineV2() @@ -136,13 +156,19 @@ public Sequence apply(final Cursor cursor) @Override public GroupByEngineIterator make() { + QueryDimensionInfo[] dimInfo = DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + query.getDimensions(), + storageAdapter, + cursor + ); return new GroupByEngineIterator( query, config, cursor, bufferHolder.get(), fudgeTimestamp, - getDimensionInfo(query, storageAdapter, cursor) + getGroupByDimInfo(dimInfo) ); } @@ -168,6 +194,150 @@ public void close() throws IOException ); } + private static class GroupByTypeHelperFactory implements QueryTypeHelperFactory + { + @Override + public GroupByTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringGroupByTypeHelper(); + default: + return null; + } + } + } + + /** + * Contains a collection of query processing methods for type-specific operations used exclusively by + * GroupByQueryEngineV2. + * + * Each GroupByTypeHelper is associated with a single dimension. + * + * @param The type of the row values object for this dimension + */ + private interface GroupByTypeHelper extends QueryTypeHelper + { + /** + * Return the size, in bytes, of this dimension's values in the grouping key. + * + * For example, a String implementation would return 4, the size of an int. + * + * @return size, in bytes, of this dimension's values in the grouping key. + */ + int getGroupingKeySize(); + + /** + * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and write that value + * to the keyBuffer at keyBufferPosition. If rowSize is 0, write GROUP_BY_MISSING_VALUE instead. + * + * @param valuesObj row values object + * @param keyBuffer grouping key + * @param keyBufferPosition offset within grouping key + */ + void initializeGroupingKeyV2Dimension( + final RowValuesType valuesObj, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ); + + + /** + * Read the value at rowValueIdx from a row values object and write that value to the keyBuffer at keyBufferPosition. + * + * @param values row values object + * @param rowValueIdx index of the value to read + * @param keyBuffer grouping key + * @param keyBufferPosition offset within grouping key + */ + void addValueToGroupingKeyV2( + RowValuesType values, + int rowValueIdx, + ByteBuffer keyBuffer, + final int keyBufferPosition + ); + + + /** + * Read a value from a grouping key and add it to the group by query result map, using the output name specified + * in a DimensionSpec. + * + * An implementation may choose to not add anything to the result map + * (e.g., as the String implementation does for empty rows) + * + * dimInfo provides access to: + * - the keyBufferPosition offset from which to read the value + * - the dimension value selector + * - the DimensionSpec for this dimension from the query + * + * @param dimInfo dimension info containing the key offset, value selector, and dimension spec + * @param resultMap result map for the group by query being served + * @param key grouping key + */ + void processValueFromGroupingKeyV2( + GroupByDimensionInfo dimInfo, + ByteBuffer key, + Map resultMap + ); + } + + private static class StringGroupByTypeHelper implements GroupByTypeHelper + { + private static final int GROUP_BY_MISSING_VALUE = -1; + + @Override + public int getGroupingKeySize() + { + return Ints.BYTES; + } + + @Override + public void initializeGroupingKeyV2Dimension( + final IndexedInts values, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ) + { + int rowSize = values.size(); + if (rowSize == 0) { + keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + } else { + keyBuffer.putInt(keyBufferPosition, values.get(0)); + } + } + + @Override + public void addValueToGroupingKeyV2( + final IndexedInts values, + final int rowValueIdx, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ) + { + keyBuffer.putInt( + keyBufferPosition, + values.get(rowValueIdx) + ); + } + + @Override + public void processValueFromGroupingKeyV2(GroupByDimensionInfo dimInfo, ByteBuffer key, Map resultMap) + { + final int id = key.getInt(dimInfo.getKeyBufferPosition()); + + // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. + if (id != GROUP_BY_MISSING_VALUE) { + resultMap.put( + dimInfo.getOutputName(), + ((DimensionSelector) dimInfo.getSelector()).lookupName(id) + ); + } + } + } + private static class GroupByEngineIterator implements Iterator, Closeable { private final GroupByQuery query; @@ -179,7 +349,7 @@ private static class GroupByEngineIterator implements Iterator, Closeable private final ByteBuffer keyBuffer; private final int[] stack; private final Object[] valuess; - private final QueryDimensionInfo[] dims; + private final GroupByDimensionInfo[] dims; private int stackp = Integer.MIN_VALUE; private boolean currentRowWasPartiallyAggregated = false; @@ -191,7 +361,7 @@ public GroupByEngineIterator( final Cursor cursor, final ByteBuffer buffer, final DateTime fudgeTimestamp, - final QueryDimensionInfo[] dims + final GroupByDimensionInfo[] dims ) { final int dimCount = query.getDimensions().size(); @@ -248,7 +418,7 @@ public Row next() final DimensionQueryHelper queryHelper = dims[i].getQueryHelper(); valuess[i] = queryHelper.getRowFromDimSelector(dims[i].getSelector()); int rowSize = queryHelper.getRowSize(valuess[i]); - queryHelper.initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); + dims[i].getQueryTypeHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); stack[i] = rowSize == 0 ? 0 : 1; } } @@ -269,11 +439,11 @@ public Row next() if (stackp >= 0 && stack[stackp] < dims[stackp].getQueryHelper().getRowSize(valuess[stackp])) { // Load next value for current slot - dims[stackp].getQueryHelper().addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].getKeyBufferPosition()); + dims[stackp].getQueryTypeHelper().addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].getKeyBufferPosition()); stack[stackp]++; for (int i = stackp + 1; i < stack.length; i++) { int rowSize = dims[i].getQueryHelper().getRowSize(valuess[i]); - dims[i].getQueryHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); + dims[i].getQueryTypeHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); stack[i] = rowSize == 0 ? 0 : 1; } stackp = stack.length - 1; @@ -299,8 +469,12 @@ public Row apply(final Grouper.Entry entry) Map theMap = Maps.newLinkedHashMap(); // Add dimensions. - for (QueryDimensionInfo dimInfo : dims) { - dimInfo.getQueryHelper().processValueFromGroupingKeyV2(dimInfo, entry.getKey(), theMap); + for (GroupByDimensionInfo dimInfo : dims) { + dimInfo.getQueryTypeHelper().processValueFromGroupingKeyV2( + dimInfo, + entry.getKey(), + theMap + ); } // Add aggregations. @@ -349,11 +523,11 @@ private static class GroupByEngineKeySerde implements Grouper.KeySerde> { + private static final SearchTypeHelperFactory TYPE_HELPER_FACTORY = new SearchTypeHelperFactory(); + private static final EmittingLogger log = new EmittingLogger(SearchQueryRunner.class); private final Segment segment; - public SearchQueryRunner(Segment segment) + private static class SearchTypeHelperFactory implements QueryTypeHelperFactory { - this.segment = segment; + @Override + public SearchTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringSearchTypeHelper(); + default: + return null; + } + } } - private QueryDimensionInfo getDimInfoFromSpec(DimensionSpec dimSpec, StorageAdapter adapter, Cursor cursor) + public interface SearchTypeHelper extends QueryTypeHelper { - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - dimSpec.getDimension(), - cursor, - Lists.newArrayList(adapter.getAvailableDimensions()) + /** + * Read the current row from dimSelector and update the search result set. + * + * For each row value: + * 1. Check if searchQuerySpec accept()s the value + * 2. If so, add the value to the result set and increment the counter for that value + * 3. If the size of the result set reaches the limit after adding a value, return early. + * + * @param outputName Output name for this dimension in the search query being served + * @param dimSelector Dimension value selector + * @param searchQuerySpec Spec for the search query + * @param set The result set of the search query + * @param limit The limit of the search query + */ + void updateSearchResultSet( + String outputName, + ValueSelectorType dimSelector, + SearchQuerySpec searchQuerySpec, + int limit, + Object2IntRBTreeMap set ); - final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); - return dimInfo; + } + + public static class StringSearchTypeHelper implements SearchTypeHelper + { + @Override + public void updateSearchResultSet( + String outputName, + DimensionSelector selector, + SearchQuerySpec searchQuerySpec, + int limit, + final Object2IntRBTreeMap set + ) + { + if (selector != null) { + final IndexedInts vals = selector.getRow(); + for (int i = 0; i < vals.size(); ++i) { + final String dimVal = selector.lookupName(vals.get(i)); + if (searchQuerySpec.accept(dimVal)) { + set.addTo(new SearchHit(outputName, dimVal), 1); + if (set.size() >= limit) { + return; + } + } + } + } + } + } + + public SearchQueryRunner(Segment segment) + { + this.segment = segment; } @Override @@ -269,14 +331,18 @@ public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap return set; } - List dimInfoList = Lists.newArrayList(); - for (DimensionSpec dim : nonBitmapDims) { - dimInfoList.add(getDimInfoFromSpec(dim, adapter, cursor)); - } + List> dimInfoList = Arrays.asList( + DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + nonBitmapDims, + adapter, + cursor + ) + ); while (!cursor.isDone()) { - for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryHelper().updateSearchResultSet( + for (QueryDimensionInfo dimInfo : dimInfoList) { + dimInfo.getQueryTypeHelper().updateSearchResultSet( dimInfo.getOutputName(), dimInfo.getSelector(), searchQuerySpec, diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index e85bd62e78a7..c898dfafe74a 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -31,21 +31,28 @@ import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.QueryTypeHelperFactory; import io.druid.query.filter.Filter; import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; +import io.druid.segment.DimensionSelector; import io.druid.segment.LongColumnSelector; import io.druid.segment.ObjectColumnSelector; import io.druid.segment.Segment; import io.druid.segment.StorageAdapter; import io.druid.segment.column.Column; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; import io.druid.timeline.DataSegmentUtils; import org.joda.time.DateTime; import org.joda.time.Interval; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -53,6 +60,67 @@ */ public class SelectQueryEngine { + private static final SelectTypeHelperFactory TYPE_HELPER_FACTORY = new SelectTypeHelperFactory(); + + private static class SelectTypeHelperFactory implements QueryTypeHelperFactory + { + @Override + public SelectTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringSelectTypeHelper(); + default: + return null; + } + } + } + + public interface SelectTypeHelper extends QueryTypeHelper + { + /** + * Read the current row from dimSelector and add the row values to the result map. + * + * Multi-valued rows should be added to the result as a List, single value rows should be added as a single object. + * + * @param outputName Output name for this dimension in the select query being served + * @param dimSelector Dimension value selector + * @param resultMap Output map of the select query being served + */ + void addRowValuesToSelectResult( + String outputName, + ValueSelectorType dimSelector, + Map resultMap + ); + } + + public static class StringSelectTypeHelper implements SelectTypeHelper + { + @Override + public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map theEvent) + { + if (selector == null) { + theEvent.put(outputName, null); + } else { + final IndexedInts vals = selector.getRow(); + + if (vals.size() == 1) { + final String dimVal = selector.lookupName(vals.get(0)); + theEvent.put(outputName, dimVal); + } else { + List dimVals = new ArrayList<>(vals.size()); + for (int i = 0; i < vals.size(); ++i) { + dimVals.add(selector.lookupName(vals.get(i))); + } + theEvent.put(outputName, dimVals); + } + } + } + } + public Sequence> process(final SelectQuery query, final Segment segment) { final StorageAdapter adapter = segment.asStorageAdapter(); @@ -106,16 +174,16 @@ public Result apply(Cursor cursor) final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME); - final List dimInfoList = Lists.newArrayList(); + final List> dimInfoList = Arrays.asList( + DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + Lists.newArrayList(dims), + adapter, + cursor + ) + ); + for (DimensionSpec dimSpec : dims) { - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - dimSpec.getDimension(), - cursor, - Lists.newArrayList(adapter.getAvailableDimensions()) - ); - final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(dimSpec, cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo(dimSpec, queryHelper, dimSelector, 0); - dimInfoList.add(dimInfo); builder.addDimension(dimSpec.getOutputName()); } @@ -135,8 +203,8 @@ public Result apply(Cursor cursor) final Map theEvent = Maps.newLinkedHashMap(); theEvent.put(EventHolder.timestampKey, new DateTime(timestampColumnSelector.get())); - for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryHelper().addRowValuesToSelectResult(dimInfo.getOutputName(), dimInfo.getSelector(), theEvent); + for (QueryDimensionInfo dimInfo : dimInfoList) { + dimInfo.getQueryTypeHelper().addRowValuesToSelectResult(dimInfo.getOutputName(), dimInfo.getSelector(), theEvent); } for (Map.Entry metSelector : metSelectors.entrySet()) { diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 6174512d27e0..7b9b92502940 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -60,8 +60,8 @@ public TopNParams makeInitParams( @Override protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) { - QueryDimensionInfo dimInfo = params.getDimInfo(); - return dimInfo.getQueryHelper().getDimExtractionRowSelector(params, query, capabilities); + QueryDimensionInfo dimInfo = params.getDimInfo(); + return dimInfo.getQueryTypeHelper().getDimExtractionRowSelector(params, query, capabilities); } @Override @@ -85,10 +85,10 @@ public void scanAndAggregate( ) { final Cursor cursor = params.getCursor(); - final QueryDimensionInfo dimInfo = params.getDimInfo(); + final QueryDimensionInfo dimInfo = params.getDimInfo(); while (!cursor.isDone()) { - dimInfo.getQueryHelper().dimExtractionScanAndAggregate( + dimInfo.getQueryTypeHelper().dimExtractionScanAndAggregate( dimInfo.getSelector(), rowSelector, aggregatesStore, diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index 09d699a6b5d3..cb4785811d0f 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -20,15 +20,146 @@ package io.druid.query.topn; import com.google.common.base.Function; +import com.google.common.collect.Lists; import io.druid.query.Result; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.Aggregator; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.segment.Capabilities; import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; +import io.druid.segment.DimensionSelector; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; +import io.druid.segment.data.IndexedInts; + +import java.util.Map; public class TopNMapFn implements Function> { + private static final TopNTypeHelperFactory TYPE_HELPER_FACTORY = new TopNTypeHelperFactory(); + + private static class TopNTypeHelperFactory implements QueryTypeHelperFactory + { + @Override + public TopNTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringTopNTypeHelper(); + default: + return null; + } + } + } + + public interface TopNTypeHelper extends QueryTypeHelper + { + /** + * Used by DimExtractionTopNAlgorithm. + * + * Create an Aggregator[][] using BaseTopNAlgorithm.AggregatorArrayProvider and the given parameters. + * + * As the Aggregator[][] is used as an integer-based lookup, this method is only applicable for dimension types + * that use integer row values. + * + * A dimension type that does not have integer values should return null. + * + * @param params Parameters for the TopN query being served + * @param query The TopN query being served + * @param capabilities Object indicating if dimension values are sorted + * @return an Aggregator[][] for integer-valued dimensions, null otherwise + */ + Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); + + + /** + * Used by DimExtractionTopNAlgorithm. + * + * Read the current row from a dimension value selector, and for each row value: + * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from + * aggregatesStore (slower map). + * + * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, + * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the + * provided cursor and query, storing them in rowSelector and aggregatesStore + * + * 3. Call aggregate() on each of the aggregators. + * + * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. + * + * @param selector Dimension value selector + * @param rowSelector Integer lookup containing aggregators + * @param aggregatesStore Map containing aggregators + * @param cursor Cursor for the segment being queried + * @param query The TopN query being served. + */ + void dimExtractionScanAndAggregate( + ValueSelectorType selector, + Aggregator[][] rowSelector, + Map aggregatesStore, + Cursor cursor, + TopNQuery query + ); + } + + public static class StringTopNTypeHelper implements TopNTypeHelper + { + @Override + public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities) + { + // This method is used for the DimExtractionTopNAlgorithm only. + // Unlike regular topN we cannot rely on ordering to optimize. + // Optimization possibly requires a reverse lookup from value to ID, which is + // not possible when applying an extraction function + + final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( + (DimensionSelector) params.getDimSelector(), + query, + params.getCardinality(), + capabilities + ); + + return provider.build(); + } + + @Override + public void dimExtractionScanAndAggregate( + DimensionSelector selector, + Aggregator[][] rowSelector, + Map aggregatesStore, + Cursor cursor, + TopNQuery query + ) + { + final IndexedInts dimValues = selector.getRow(); + + for (int i = 0; i < dimValues.size(); ++i) { + final int dimIndex = dimValues.get(i); + Aggregator[] theAggregators = rowSelector[dimIndex]; + if (theAggregators == null) { + final String key = selector.lookupName(dimIndex); + theAggregators = aggregatesStore.get(key); + if (theAggregators == null) { + theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); + aggregatesStore.put(key, theAggregators); + } + rowSelector[dimIndex] = theAggregators; + } + + for (Aggregator aggregator : theAggregators) { + aggregator.aggregate(); + } + } + } + } + + private final TopNQuery query; private final TopNAlgorithm topNAlgorithm; @@ -45,20 +176,20 @@ public TopNMapFn( @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( - query.getDimensionSpec().getDimension(), - cursor, - null + final QueryDimensionInfo[] dimInfoArray = DimensionHandlerUtils.getDimensionInfo( + TYPE_HELPER_FACTORY, + Lists.newArrayList(query.getDimensionSpec()), + null, + cursor ); - final ColumnValueSelector dimSelector = queryHelper.getColumnValueSelector(query.getDimensionSpec(), cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo(query.getDimensionSpec(), queryHelper, dimSelector, 0); - if (dimSelector == null) { + + if (dimInfoArray[0].getSelector() == null) { return null; } TopNParams params = null; try { - params = topNAlgorithm.makeInitParams(dimInfo, cursor); + params = topNAlgorithm.makeInitParams(dimInfoArray[0], cursor); TopNResultBuilder resultBuilder = BaseTopNAlgorithm.makeResultBuilder(params, query); diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index be0f8a0bdd93..dc1151f163fd 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -19,10 +19,16 @@ package io.druid.segment; +import com.google.common.collect.Lists; import io.druid.java.util.common.IAE; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import io.druid.query.QueryDimensionInfo; +import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.QueryTypeHelperFactory; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.column.ValueType; import java.util.List; @@ -31,6 +37,11 @@ public final class DimensionHandlerUtils { private DimensionHandlerUtils() {} + private final static ColumnCapabilities DEFAULT_STRING_CAPABILITIES = + new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setHasBitmapIndexes(true); + public static DimensionHandler getHandlerFromCapabilities( String dimensionName, ColumnCapabilities capabilities, @@ -58,17 +69,88 @@ public static DimensionHandler getHandlerFromCapabilities( return new StringDimensionHandler(dimensionName, multiValueHandling); } - public static DimensionQueryHelper makeQueryHelper(String dimName, ColumnSelectorFactory columnSelectorFactory, List availableDimensions) + public static DimensionQueryHelper makeBaseQueryHelper( + String dimName, + ColumnCapabilities capabilities, + List availableDimensions + ) + { + capabilities = setDefaultForInvalidCapabilities(dimName, capabilities, availableDimensions); + if (capabilities.getType() == ValueType.STRING) { + return new StringDimensionQueryHelper(dimName); + } + return null; + } + + public static QueryTypeHelperClass makeQueryTypeHelper( + QueryTypeHelperFactory typeHelperFactory, + String dimName, + ColumnCapabilities capabilities, + List availableDimensions + ) + { + capabilities = setDefaultForInvalidCapabilities(dimName, capabilities, availableDimensions); + return typeHelperFactory.makeQueryTypeHelper(dimName, capabilities); + } + + public static QueryDimensionInfo[] getDimensionInfo( + QueryTypeHelperFactory typeHelperFactory, + List dimensionSpecs, + StorageAdapter adapter, + ColumnSelectorFactory cursor + ) { - final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); + int dimCount = dimensionSpecs.size(); + QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; + for (int i = 0; i < dimCount; i++) { + final DimensionSpec dimSpec = dimensionSpecs.get(i); + final String dimName = dimSpec.getDimension(); + DimensionQueryHelper baseHelper = makeBaseQueryHelper( + dimName, + cursor.getColumnCapabilities(dimSpec.getDimension()), + adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) + ); + QueryTypeHelperClass queryTypeHelper = makeQueryTypeHelper( + typeHelperFactory, + dimName, + cursor.getColumnCapabilities(dimSpec.getDimension()), + adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) + ); + + final ColumnValueSelector selector = baseHelper.getColumnValueSelector(dimSpec, cursor); + final QueryDimensionInfo dimInfo = new QueryDimensionInfo<>( + dimSpec, + baseHelper, + queryTypeHelper, + selector + ); + dims[i] = dimInfo; + } + return dims; + } + + private static ColumnCapabilities setDefaultForInvalidCapabilities( + String dimName, + ColumnCapabilities capabilities, + List availableDimensions + ) + { + if (capabilities == null) { + capabilities = DEFAULT_STRING_CAPABILITIES; + } // treat metrics as null for now if (availableDimensions != null) { - if (availableDimensions.contains(dimName)) { - handler = new StringDimensionHandler(dimName, null); + if (!availableDimensions.contains(dimName)) { + capabilities = DEFAULT_STRING_CAPABILITIES; } } - return handler.makeQueryHelper(); + + // non-Strings aren't actually supported yet + if (capabilities.getType() != ValueType.STRING) { + capabilities = DEFAULT_STRING_CAPABILITIES; + } + + return capabilities; } } diff --git a/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java index 5c1cebe56368..189cf620df44 100644 --- a/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java @@ -19,24 +19,9 @@ package io.druid.segment; -import com.google.common.base.Function; -import com.google.common.hash.Hasher; -import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; -import io.druid.query.search.search.SearchHit; -import io.druid.query.search.search.SearchQuerySpec; -import io.druid.query.topn.TopNParams; -import io.druid.query.topn.TopNQuery; -import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; - -import java.nio.ByteBuffer; -import java.util.Comparator; -import java.util.List; -import java.util.Map; /** * Query related interface. @@ -44,6 +29,8 @@ * Contains a collection of query processing methods for functionality that is dependent on * the type of a dimension. * + * The methods within this interface are general methods that are not tied to a specific query type. + * * Each DimensionQueryHelper is associated with a single dimension. * * @param The type of this dimension's values @@ -60,6 +47,13 @@ public interface DimensionQueryHelper, */ ValueSelectorType getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory); + /** + * Retrieve the current row from a dimension value selector. + * + * @param dimSelector Dimension value selector + * @return Current row + */ + RowValuesType getRowFromDimSelector(ValueSelectorType dimSelector); /** * Get the size of a row object. @@ -108,281 +102,4 @@ public interface DimensionQueryHelper, * @return A ValueMatcher that applies the predicate for this DimensionQueryHelper's value type from the predicateFactory */ ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); - - - /** - * Used by CardinalityAggregator. - * - * Retrieve the current row from dimSelector and add the row values to the hasher. - * - * @param dimSelector Dimension value selector - * @param hasher Hasher used for cardinality aggregator calculations - */ - void hashRow(ValueSelectorType dimSelector, Hasher hasher); - - - /** - * Used by CardinalityAggregator. - * - * Retrieve the current row from dimSelector and add the row values to the hasher. - * @param dimSelector Dimension value selector - * @param collector HLL collector used for cardinality aggregator calculations - */ - void hashValues(ValueSelectorType dimSelector, HyperLogLogCollector collector); - - - /** - * Used by GroupByEngine. - * - * Return the size, in bytes, of this dimension's values in the grouping key. - * - * For example, a String implementation would return 4, the size of an int. - * - * @return size, in bytes, of this dimension's values in the grouping key. - */ - int getGroupingKeySize(); - - - /** - * Used by GroupByEngine. - * - * A grouping key contains a concatenation of byte[] representations of dimension values. - * - * When comparing two grouping keys, the individual dimension values will be compared using this method - * provided by the query helper. - * - * @param b1 ByteBuffer containing the first comparison value - * @param pos1 Offset within b1 of the first comparison value - * @param b2 ByteBuffer containing the second comparison value - * @param pos2 Offset within b2 of the second comparison value - * @return A negative value if the value at pos1 of b1 is < than the value at pos2 of b2 - * 0 if the two values are equal - * A positive value if the first value is > than the latter - */ - int compareGroupingKeys(ByteBuffer b1, int pos1, ByteBuffer b2, int pos2); - - - /** - * Used by GroupByEngine. - * - * A grouping key contains a concatenation of byte[] representations of dimension values. - * - * When comparing two grouping keys, the individual dimension values will be compared with comparators - * provided by the query helper. - * - * @return A comparator suitable for comparing byte representations of this dimension's type of values. - */ - Comparator getGroupingKeyByteComparator(); - - - /** - * Used by GroupByEngine. - * - * Perform a relative read on a grouping key ByteBuffer to retrieve a single dimension value, and - * add the retrieved value to a GroupBy result map. - * - * An implementation may choose to not add anything to the result map - * (e.g., as the String implementation does for empty rows) - * - * @param theEvent Result map for the GroupBy query being served - * @param outputName The output name of this dimension for the GroupBy query being served, as specified in the DimensionSpec - * @param dimSelector Dimension value selector, used for value lookups if needed - * @param keyBuffer Grouping key, already positioned at this dimension's offset - */ - void processDimValueFromGroupingKey( - String outputName, - ValueSelectorType dimSelector, - ByteBuffer keyBuffer, - Map theEvent - ); - - - /** - * Used by GroupByEngine. - * - * Read the current row from a dimension value selector and add the row values to the grouping key. - * - * This is called by GroupByEngine's updateValues() function, which uses recursion to traverse the dimensions in the grouping set. - * - * Before adding a dimension value to the grouping key, this function should duplicate() the provided key buffer and - * add the value to the duplicate key. - * - * After adding a dimension value to the duplicate grouping key, an implementation of this function should call - * updateValuesFn on the new key to perform the recursion. - * - * For multi-value rows, this function should duplicate the original grouping key before adding each value, and - * call updateValuesFn on each new key. - * - * See StringDimensionQueryHelper for a reference implementation. - * - * @param dimSelector Dimension value selector - * @param key ByteBuffer for the grouping key - * @param updateValuesFn Function provided by GroupByEngine for updateValues() recursion - * @return Return the result of calling updateValuesFn on the updated grouping key - */ - List addDimValuesToGroupingKey( - ValueSelectorType dimSelector, - ByteBuffer key, - Function> updateValuesFn - ); - - - /** - * Retrieve the current row from a dimension value selector. - * - * @param dimSelector Dimension value selector - * @return Current row - */ - RowValuesType getRowFromDimSelector(ValueSelectorType dimSelector); - - - /** - * Used by GroupByEngineV2. - * - * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and write that value - * to the keyBuffer at keyBufferPosition. If rowSize is 0, write GROUP_BY_MISSING_VALUE instead. - * - * @param valuesObj row values object - * @param keyBuffer grouping key - * @param keyBufferPosition offset within grouping key - */ - void initializeGroupingKeyV2Dimension( - final RowValuesType valuesObj, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ); - - - /** - * Used by GroupByEngineV2. - * - * Read the value at rowValueIdx from a row values object and write that value to the keyBuffer at keyBufferPosition. - * - * @param values row values object - * @param rowValueIdx index of the value to read - * @param keyBuffer grouping key - * @param keyBufferPosition offset within grouping key - */ - void addValueToGroupingKeyV2( - RowValuesType values, - int rowValueIdx, - ByteBuffer keyBuffer, - final int keyBufferPosition - ); - - - /** - * Used by GroupByEngineV2. - * - * Read a value from a grouping key and add it to the group by query result map, using the output name specified - * in a DimensionSpec. - * - * An implementation may choose to not add anything to the result map - * (e.g., as the String implementation does for empty rows) - * - * dimInfo provides access to: - * - the keyBufferPosition offset from which to read the value - * - the dimension value selector - * - the DimensionSpec for this dimension from the query - * - * @param dimInfo dimension info containing the key offset, value selector, and dimension spec - * @param resultMap result map for the group by query being served - * @param key grouping key - */ - void processValueFromGroupingKeyV2( - QueryDimensionInfo dimInfo, - ByteBuffer key, - Map resultMap - ); - - - /** - * Used by DimExtractionTopNAlgorithm. - * - * Create an Aggregator[][] using BaseTopNAlgorithm.AggregatorArrayProvider and the given parameters. - * - * As the Aggregator[][] is used as an integer-based lookup, this method is only applicable for dimension types - * that use integer row values. - * - * A dimension type that does not have integer values should return null. - * - * @param params Parameters for the TopN query being served - * @param query The TopN query being served - * @param capabilities Object indicating if dimension values are sorted - * @return an Aggregator[][] for integer-valued dimensions, null otherwise - */ - Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); - - - /** - * Used by DimExtractionTopNAlgorithm. - * - * Read the current row from a dimension value selector, and for each row value: - * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from - * aggregatesStore (slower map). - * - * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, - * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the - * provided cursor and query, storing them in rowSelector and aggregatesStore - * - * 3. Call aggregate() on each of the aggregators. - * - * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. - * - * @param selector Dimension value selector - * @param rowSelector Integer lookup containing aggregators - * @param aggregatesStore Map containing aggregators - * @param cursor Cursor for the segment being queried - * @param query The TopN query being served. - */ - void dimExtractionScanAndAggregate( - ValueSelectorType selector, - Aggregator[][] rowSelector, - Map aggregatesStore, - Cursor cursor, - TopNQuery query - ); - - - /** - * Used by the select query. - * - * Read the current row from dimSelector and add the row values to the result map. - * - * Multi-valued rows should be added to the result as a List, single value rows should be added as a single object. - * - * @param outputName Output name for this dimension in the select query being served - * @param dimSelector Dimension value selector - * @param resultMap Output map of the select query being served - */ - void addRowValuesToSelectResult( - String outputName, - ValueSelectorType dimSelector, - Map resultMap - ); - - - /** - * Used by the search query. - * - * Read the current row from dimSelector and update the search result set. - * - * For each row value: - * 1. Check if searchQuerySpec accept()s the value - * 2. If so, add the value to the result set and increment the counter for that value - * 3. If the size of the result set reaches the limit after adding a value, return early. - * - * @param outputName Output name for this dimension in the search query being served - * @param dimSelector Dimension value selector - * @param searchQuerySpec Spec for the search query - * @param set The result set of the search query - * @param limit The limit of the search query - */ - void updateSearchResultSet( - String outputName, - ValueSelectorType dimSelector, - SearchQuerySpec searchQuerySpec, - int limit, - Object2IntRBTreeMap set - ); } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index f7689a9c90a3..95f86679a87c 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -1041,9 +1041,9 @@ public ValueMatcher makeValueMatcher(String dimension, final Comparable value) } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( dimension, - cursor, + cursor.getColumnCapabilities(dimension), Lists.newArrayList(storageAdapter.getAvailableDimensions()) ); return queryHelper.getValueMatcher(cursor, value); @@ -1058,9 +1058,9 @@ public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFacto } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeQueryHelper( + final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( dimension, - cursor, + cursor.getColumnCapabilities(dimension), Lists.newArrayList(storageAdapter.getAvailableDimensions()) ); return queryHelper.getValueMatcher(cursor, predicateFactory); diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java index af3ef0426cdb..16510e93611f 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -19,43 +19,22 @@ package io.druid.segment; -import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; -import com.google.common.hash.Hasher; import com.google.common.primitives.Ints; -import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.CardinalityAggregator; -import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; -import io.druid.query.search.search.SearchHit; -import io.druid.query.search.search.SearchQuerySpec; -import io.druid.query.topn.BaseTopNAlgorithm; -import io.druid.query.topn.TopNParams; -import io.druid.query.topn.TopNQuery; import io.druid.segment.data.EmptyIndexedInts; import io.druid.segment.data.IndexedInts; -import it.unimi.dsi.fastutil.ints.IntIterator; -import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; -import java.util.List; -import java.util.Map; import java.util.Objects; public class StringDimensionQueryHelper implements DimensionQueryHelper { - private static final int GROUP_BY_MISSING_VALUE = -1; - public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; - public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; private final String dimensionName; private static Comparator GROUPING_KEY_COMPARATOR = new Comparator() @@ -79,6 +58,11 @@ public DimensionSelector getColumnValueSelector(DimensionSpec dimensionSpec, Col return columnSelectorFactory.makeDimensionSelector(dimensionSpec); } + @Override + public IndexedInts getRowFromDimSelector(DimensionSelector selector) { + return selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); + } + @Override public int getRowSize(IndexedInts rowValues) { @@ -216,235 +200,4 @@ public boolean matches() }; } } - - @Override - public void hashRow(DimensionSelector dimSelector, Hasher hasher) - { - final IndexedInts row = dimSelector.getRow(); - final int size = row.size(); - // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. - if (size == 1) { - final String value = dimSelector.lookupName(row.get(0)); - hasher.putUnencodedChars(convertValueForCardinalityAggregator(value)); - } else if (size != 0) { - final String[] values = new String[size]; - for (int i = 0; i < size; ++i) { - final String value = dimSelector.lookupName(row.get(i)); - values[i] = convertValueForCardinalityAggregator(value); - } - // Values need to be sorted to ensure consistent multi-value ordering across different segments - Arrays.sort(values); - for (int i = 0; i < size; ++i) { - if (i != 0) { - hasher.putChar(CARDINALITY_AGG_SEPARATOR); - } - hasher.putUnencodedChars(values[i]); - } - } - } - - @Override - public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) - { - for (IntIterator rowIt = dimSelector.getRow().iterator(); rowIt.hasNext(); ) { - int index = rowIt.nextInt(); - final String value = dimSelector.lookupName(index); - collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(convertValueForCardinalityAggregator(value)).asBytes()); - } - } - - @Override - public int getGroupingKeySize() - { - return Ints.BYTES; - } - - @Override - public int compareGroupingKeys(ByteBuffer b1, int pos1, ByteBuffer b2, int pos2) - { - final int v1 = b1.getInt(pos1); - final int v2 = b2.getInt(pos2); - return Ints.compare(v1, v2); - } - - @Override - public Comparator getGroupingKeyByteComparator() - { - return GROUPING_KEY_COMPARATOR; - } - - @Override - public void processDimValueFromGroupingKey( - String outputName, DimensionSelector dimSelector, ByteBuffer keyBuffer, Map theEvent - ) - { - final int dimVal = keyBuffer.getInt(); - if (dimVal != GROUP_BY_MISSING_VALUE) { - theEvent.put(outputName, dimSelector.lookupName(dimVal)); - } - } - - @Override - public List addDimValuesToGroupingKey( - DimensionSelector selector, ByteBuffer key, Function> updateValuesFn - ) - { - List unaggregatedBuffers = null; - final IndexedInts row = selector.getRow(); - if (row == null || row.size() == 0) { - ByteBuffer newKey = key.duplicate(); - newKey.putInt(GROUP_BY_MISSING_VALUE); - unaggregatedBuffers = updateValuesFn.apply(newKey); - } else { - for (IntIterator rowIt = row.iterator(); rowIt.hasNext(); ) { - ByteBuffer newKey = key.duplicate(); - int dimValue = rowIt.nextInt(); - newKey.putInt(dimValue); - unaggregatedBuffers = updateValuesFn.apply(newKey); - } - } - return unaggregatedBuffers; - } - - @Override - public IndexedInts getRowFromDimSelector(DimensionSelector selector) { - return selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); - } - - @Override - public void initializeGroupingKeyV2Dimension( - final IndexedInts values, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ) - { - int rowSize = values.size(); - if (rowSize == 0) { - keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); - } else { - keyBuffer.putInt(keyBufferPosition, values.get(0)); - } - } - - @Override - public void addValueToGroupingKeyV2( - final IndexedInts values, - final int rowValueIdx, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ) - { - keyBuffer.putInt( - keyBufferPosition, - values.get(rowValueIdx) - ); - } - - @Override - public void processValueFromGroupingKeyV2(QueryDimensionInfo dimInfo, ByteBuffer key, Map resultMap) - { - final int id = key.getInt(dimInfo.getKeyBufferPosition()); - - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - resultMap.put( - dimInfo.getOutputName(), - ((DimensionSelector) dimInfo.getSelector()).lookupName(id) - ); - } - } - - @Override - public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities) - { - // This method is used for the DimExtractionTopNAlgorithm only. - // Unlike regular topN we cannot rely on ordering to optimize. - // Optimization possibly requires a reverse lookup from value to ID, which is - // not possible when applying an extraction function - - final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( - (DimensionSelector) params.getDimSelector(), - query, - params.getCardinality(), - capabilities - ); - - return provider.build(); - } - - @Override - public void dimExtractionScanAndAggregate(DimensionSelector selector, Aggregator[][] rowSelector, Map aggregatesStore, Cursor cursor, TopNQuery query) - { - final IndexedInts dimValues = selector.getRow(); - - for (int i = 0; i < dimValues.size(); ++i) { - final int dimIndex = dimValues.get(i); - Aggregator[] theAggregators = rowSelector[dimIndex]; - if (theAggregators == null) { - final String key = selector.lookupName(dimIndex); - theAggregators = aggregatesStore.get(key); - if (theAggregators == null) { - theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); - aggregatesStore.put(key, theAggregators); - } - rowSelector[dimIndex] = theAggregators; - } - - for (Aggregator aggregator : theAggregators) { - aggregator.aggregate(); - } - } - } - - - @Override - public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map theEvent) - { - if (selector == null) { - theEvent.put(outputName, null); - } else { - final IndexedInts vals = selector.getRow(); - - if (vals.size() == 1) { - final String dimVal = selector.lookupName(vals.get(0)); - theEvent.put(outputName, dimVal); - } else { - List dimVals = new ArrayList<>(vals.size()); - for (int i = 0; i < vals.size(); ++i) { - dimVals.add(selector.lookupName(vals.get(i))); - } - theEvent.put(outputName, dimVals); - } - } - } - - @Override - public void updateSearchResultSet( - String outputName, - DimensionSelector selector, - SearchQuerySpec searchQuerySpec, - int limit, - final Object2IntRBTreeMap set - ) - { - if (selector != null) { - final IndexedInts vals = selector.getRow(); - for (int i = 0; i < vals.size(); ++i) { - final String dimVal = selector.lookupName(vals.get(i)); - if (searchQuerySpec.accept(dimVal)) { - set.addTo(new SearchHit(outputName, dimVal), 1); - if (set.size() >= limit) { - return; - } - } - } - } - } - - // CardinalityAggregator has a special representation for nulls - private String convertValueForCardinalityAggregator(String value) - { - return value == null ? CARDINALITY_AGG_NULL_STRING : value; - } - } diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java index 5b9fdc5036d8..e7d16c07a8dd 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java @@ -43,7 +43,7 @@ public class CardinalityAggregatorBenchmark extends SimpleBenchmark CardinalityBufferAggregator agg; List selectorList; - List dimInfoList; + List> dimInfoList; ByteBuffer buf; int pos; @@ -81,7 +81,7 @@ public String[] apply(Integer input) final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null); - final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0); + final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), null, dim1); selectorList = Lists.newArrayList( (DimensionSelector) dim1 diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index be2bee97adce..acd43771c808 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -246,19 +246,19 @@ private static void bufferAggregate( } } - List dimInfoList; + List> dimInfoList; List selectorList; CardinalityAggregatorFactory rowAggregatorFactory; CardinalityAggregatorFactory valueAggregatorFactory; final TestDimensionSelector dim1; final TestDimensionSelector dim2; - List dimInfoListWithExtraction; + List> dimInfoListWithExtraction; List selectorListWithExtraction; final TestDimensionSelector dim1WithExtraction; final TestDimensionSelector dim2WithExtraction; - List dimInfoListConstantVal; + List> dimInfoListConstantVal; List selectorListConstantVal; final TestDimensionSelector dim1ConstantVal; final TestDimensionSelector dim2ConstantVal; @@ -272,8 +272,18 @@ public CardinalityAggregatorTest() dim2 = new TestDimensionSelector(values2, null); dimInfoList = Lists.newArrayList( - new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0), - new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + new QueryDimensionInfo( + dimSpec1, + new StringDimensionQueryHelper("dim1"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim1 + ), + new QueryDimensionInfo( + dimSpec2, + new StringDimensionQueryHelper("dim2"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim2 + ) ); selectorList = Lists.newArrayList( @@ -309,8 +319,18 @@ public CardinalityAggregatorTest() dim2WithExtraction ); dimInfoListWithExtraction = Lists.newArrayList( - new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1WithExtraction, 0), - new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2WithExtraction, 0) + new QueryDimensionInfo( + dimSpec1, + new StringDimensionQueryHelper("dim1"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim1WithExtraction + ), + new QueryDimensionInfo( + dimSpec2, + new StringDimensionQueryHelper("dim2"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim2WithExtraction + ) ); String helloJsFn = "function(str) { return 'hello' }"; @@ -322,8 +342,19 @@ public CardinalityAggregatorTest() dim2ConstantVal ); dimInfoListConstantVal = Lists.newArrayList( - new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1ConstantVal, 0), - new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2ConstantVal, 0) + new QueryDimensionInfo( + dimSpec1, + new StringDimensionQueryHelper("dim1"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim1ConstantVal + ), + new QueryDimensionInfo( + dimSpec2, + new StringDimensionQueryHelper( + "dim2"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim2ConstantVal + ) ); } @@ -406,11 +437,21 @@ public void testCombineRows() { List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0) + List> dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo( + dimSpec1, + new StringDimensionQueryHelper("dim1"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim1 + ) ); - List dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + List> dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo( + dimSpec2, + new StringDimensionQueryHelper("dim2"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim2 + ) ); CardinalityAggregator agg1 = new CardinalityAggregator("billy", dimInfo1, true); @@ -444,11 +485,21 @@ public void testCombineValues() List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), dim1, 0) + List> dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo( + dimSpec1, + new StringDimensionQueryHelper("dim1"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim1 + ) ); - List dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo(dimSpec2, new StringDimensionQueryHelper("dim2"), dim2, 0) + List> dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo( + dimSpec2, + new StringDimensionQueryHelper("dim2"), + new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + dim2 + ) ); CardinalityAggregator agg1 = new CardinalityAggregator("billy", dimInfo1, false); From 65fb82c99ce771cdc4c9f837bd119a4121086a71 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 15 Nov 2016 18:27:35 -0800 Subject: [PATCH 06/12] Treat empty rows as nulls in v2 groupby --- .../druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 4a1ffbb96618..696a41324b33 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -334,6 +334,8 @@ public void processValueFromGroupingKeyV2(GroupByDimensionInfo dimInfo, ByteBuff dimInfo.getOutputName(), ((DimensionSelector) dimInfo.getSelector()).lookupName(id) ); + } else { + resultMap.put(dimInfo.getOutputName(), ""); } } } From dfb099a344d745ad366ccabb543c6ae89543030f Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 15 Nov 2016 19:30:21 -0800 Subject: [PATCH 07/12] Reduce boxing in SearchQueryRunner --- .../java/io/druid/query/search/SearchQueryRunner.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index c483aad665fd..fdcefbbab5c3 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -63,6 +63,7 @@ import io.druid.segment.column.ValueType; import io.druid.segment.data.IndexedInts; import io.druid.segment.filter.Filters; +import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import org.joda.time.Interval; @@ -243,16 +244,17 @@ private Sequence> makeReturnResult( ) { Iterable source = Iterables.transform( - retVal.entrySet(), new Function, SearchHit>() + retVal.object2IntEntrySet(), new Function, SearchHit>() { @Override - public SearchHit apply(Map.Entry input) + public SearchHit apply(Object2IntMap.Entry input) { SearchHit hit = input.getKey(); - return new SearchHit(hit.getDimension(), hit.getValue(), input.getValue().intValue()); + return new SearchHit(hit.getDimension(), hit.getValue(), input.getIntValue()); } } ); + return Sequences.simple( ImmutableList.of( new Result( @@ -434,5 +436,4 @@ private void processBitmapDims( } } } - } From 9bb1b865b687ac56c4008116a5146a37d94abc44 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 16 Nov 2016 22:33:24 -0800 Subject: [PATCH 08/12] Add GroupBy empty row handling to MultiValuedDimensionTest --- .../src/test/java/io/druid/query/MultiValuedDimensionTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java index d4cdfc016d28..5f6e9d3758ed 100644 --- a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java @@ -139,6 +139,7 @@ public static void setupClass() throws Exception "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7", + "2011-01-14T00:00:00.000Z,product_4" }; for (String row : rows) { @@ -180,6 +181,7 @@ public void testGroupByNoFilter() throws Exception ); List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), From 9b6c8a57a2ecff340711b41ff0eff7665cca7212 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Thu, 17 Nov 2016 18:03:38 -0800 Subject: [PATCH 09/12] Address PR comments --- .../cardinality/CardinalityAggregator.java | 19 ++- .../CardinalityAggregatorFactory.java | 104 +------------- .../CardinalityBufferAggregator.java | 5 +- .../CardinalityAggregatorTypeHelper.java | 48 +++++++ ...ardinalityAggregatorTypeHelperFactory.java | 42 ++++++ ...StringCardinalityAggregatorTypeHelper.java | 76 ++++++++++ .../query/dimension/QueryTypeHelper.java | 3 + .../epinephelinae/GroupByQueryEngineV2.java | 50 +++---- .../druid/query/search/SearchQueryRunner.java | 2 +- .../druid/query/select/SelectQueryEngine.java | 15 +- .../topn/DimExtractionTopNAlgorithm.java | 15 +- .../io/druid/query/topn/TopNAlgorithm.java | 3 +- .../java/io/druid/query/topn/TopNMapFn.java | 131 +----------------- .../java/io/druid/query/topn/TopNParams.java | 7 +- .../topn/types/StringTopNTypeHelper.java | 63 +++++++++ .../query/topn/types/TopNTypeHelper.java | 80 +++++++++++ .../topn/types/TopNTypeHelperFactory.java | 42 ++++++ .../io/druid/segment/ColumnValueSelector.java | 3 + .../druid/segment/StringDimensionHandler.java | 33 ----- .../segment/StringDimensionQueryHelper.java | 13 -- .../CardinalityAggregatorBenchmark.java | 5 +- .../CardinalityAggregatorTest.java | 56 ++++---- 22 files changed, 458 insertions(+), 357 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java create mode 100644 processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelperFactory.java create mode 100644 processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java create mode 100644 processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java create mode 100644 processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java create mode 100644 processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index 597e7e8496ba..da9952d04c5c 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -24,6 +24,7 @@ import com.google.common.hash.Hashing; import io.druid.query.aggregation.Aggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.util.List; @@ -31,12 +32,15 @@ public class CardinalityAggregator implements Aggregator { private final String name; - private final List> dimInfoList; + private final List> dimInfoList; private final boolean byRow; public static final HashFunction hashFn = Hashing.murmur3_128(); - protected static void hashRow(List> dimInfoList, HyperLogLogCollector collector) + protected static void hashRow( + List> dimInfoList, + HyperLogLogCollector collector + ) { final Hasher hasher = hashFn.newHasher(); for (int k = 0; k < dimInfoList.size(); ++k) { @@ -44,15 +48,18 @@ protected static void hashRow(List dimInfo = dimInfoList.get(k); + QueryDimensionInfo dimInfo = dimInfoList.get(k); dimInfo.getQueryTypeHelper().hashRow(dimInfo.getSelector(), hasher); } collector.add(hasher.hash().asBytes()); } - protected static void hashValues(List> dimInfoList, HyperLogLogCollector collector) + protected static void hashValues( + List> dimInfoList, + HyperLogLogCollector collector + ) { - for (final QueryDimensionInfo dimInfo : dimInfoList) { + for (final QueryDimensionInfo dimInfo : dimInfoList) { dimInfo.getQueryTypeHelper().hashValues(dimInfo.getSelector(), collector); } } @@ -61,7 +68,7 @@ protected static void hashValues(List> dimInfoList, + List> dimInfoList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index 1d65cc30c9f2..f4f4a84d974e 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -25,7 +25,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import com.google.common.hash.Hasher; import io.druid.java.util.common.StringUtils; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; @@ -33,20 +32,14 @@ import io.druid.query.aggregation.Aggregators; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelperFactory; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.ColumnValueSelector; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionSelector; -import io.druid.segment.column.ColumnCapabilities; -import io.druid.segment.column.ValueType; -import io.druid.segment.data.IndexedInts; -import it.unimi.dsi.fastutil.ints.IntIterator; import org.apache.commons.codec.binary.Base64; import java.nio.ByteBuffer; @@ -103,102 +96,13 @@ public static Object estimateCardinality(Object object) private static final byte CACHE_TYPE_ID = (byte) 0x8; private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF; - private static final CardinalityAggregatorTypeHelperFactory TYPE_HELPER_FACTORY = new CardinalityAggregatorTypeHelperFactory(); - + private static final CardinalityAggregatorTypeHelperFactory TYPE_HELPER_FACTORY = + new CardinalityAggregatorTypeHelperFactory(); private final String name; private final List fields; private final boolean byRow; - private static class CardinalityAggregatorTypeHelperFactory implements QueryTypeHelperFactory - { - @Override - public CardinalityAggregatorTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities - ) - { - ValueType type = capabilities.getType(); - switch(type) { - case STRING: - return new StringCardinalityAggregatorTypeHelper(); - default: - return null; - } - } - } - - public interface CardinalityAggregatorTypeHelper extends QueryTypeHelper - { - /** - * Used by CardinalityAggregator. - * - * Retrieve the current row from dimSelector and add the row values to the hasher. - * - * @param dimSelector Dimension value selector - * @param hasher Hasher used for cardinality aggregator calculations - */ - void hashRow(ValueSelectorType dimSelector, Hasher hasher); - - - /** - * Used by CardinalityAggregator. - * - * Retrieve the current row from dimSelector and add the row values to the hasher. - * @param dimSelector Dimension value selector - * @param collector HLL collector used for cardinality aggregator calculations - */ - void hashValues(ValueSelectorType dimSelector, HyperLogLogCollector collector); - } - - public static class StringCardinalityAggregatorTypeHelper implements CardinalityAggregatorTypeHelper - { - public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; - public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; - - @Override - public void hashRow(DimensionSelector dimSelector, Hasher hasher) - { - final IndexedInts row = dimSelector.getRow(); - final int size = row.size(); - // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. - if (size == 1) { - final String value = dimSelector.lookupName(row.get(0)); - hasher.putUnencodedChars(convertValueForCardinalityAggregator(value)); - } else if (size != 0) { - final String[] values = new String[size]; - for (int i = 0; i < size; ++i) { - final String value = dimSelector.lookupName(row.get(i)); - values[i] = convertValueForCardinalityAggregator(value); - } - // Values need to be sorted to ensure consistent multi-value ordering across different segments - Arrays.sort(values); - for (int i = 0; i < size; ++i) { - if (i != 0) { - hasher.putChar(CARDINALITY_AGG_SEPARATOR); - } - hasher.putUnencodedChars(values[i]); - } - } - } - - @Override - public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) - { - for (IntIterator rowIt = dimSelector.getRow().iterator(); rowIt.hasNext(); ) { - int index = rowIt.nextInt(); - final String value = dimSelector.lookupName(index); - collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(convertValueForCardinalityAggregator(value)).asBytes()); - } - } - - // CardinalityAggregator has a special representation for nulls - private String convertValueForCardinalityAggregator(String value) - { - return value == null ? CARDINALITY_AGG_NULL_STRING : value; - } - } - - @JsonCreator public CardinalityAggregatorFactory( @JsonProperty("name") String name, diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index 6fa28f1bc1a7..387babfe2565 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -21,6 +21,7 @@ import io.druid.query.aggregation.BufferAggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.nio.ByteBuffer; @@ -28,13 +29,13 @@ public class CardinalityBufferAggregator implements BufferAggregator { - private final List> dimInfoList; + private final List> dimInfoList; private final boolean byRow; private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); public CardinalityBufferAggregator( - List> dimInfoList, + List> dimInfoList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java new file mode 100644 index 000000000000..4d6c1477bf47 --- /dev/null +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java @@ -0,0 +1,48 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.cardinality.types; + +import com.google.common.hash.Hasher; +import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.segment.ColumnValueSelector; + +public interface CardinalityAggregatorTypeHelper extends QueryTypeHelper +{ + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * + * @param dimSelector Dimension value selector + * @param hasher Hasher used for cardinality aggregator calculations + */ + void hashRow(ValueSelectorType dimSelector, Hasher hasher); + + + /** + * Used by CardinalityAggregator. + * + * Retrieve the current row from dimSelector and add the row values to the hasher. + * @param dimSelector Dimension value selector + * @param collector HLL collector used for cardinality aggregator calculations + */ + void hashValues(ValueSelectorType dimSelector, HyperLogLogCollector collector); +} diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelperFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelperFactory.java new file mode 100644 index 000000000000..d65a3fbcf1a3 --- /dev/null +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelperFactory.java @@ -0,0 +1,42 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.cardinality.types; + +import io.druid.java.util.common.IAE; +import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; + +public class CardinalityAggregatorTypeHelperFactory implements QueryTypeHelperFactory +{ + @Override + public CardinalityAggregatorTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringCardinalityAggregatorTypeHelper(); + default: + throw new IAE("Cannot create query type helper from invalid type [%s]", type); + } + } +} diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java new file mode 100644 index 000000000000..d2d3e023c98f --- /dev/null +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java @@ -0,0 +1,76 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.cardinality.types; + +import com.google.common.hash.Hasher; +import io.druid.query.aggregation.cardinality.CardinalityAggregator; +import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; +import io.druid.segment.DimensionSelector; +import io.druid.segment.data.IndexedInts; +import it.unimi.dsi.fastutil.ints.IntIterator; + +import java.util.Arrays; + +public class StringCardinalityAggregatorTypeHelper implements CardinalityAggregatorTypeHelper +{ + public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; + public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; + + @Override + public void hashRow(DimensionSelector dimSelector, Hasher hasher) + { + final IndexedInts row = dimSelector.getRow(); + final int size = row.size(); + // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. + if (size == 1) { + final String value = dimSelector.lookupName(row.get(0)); + hasher.putUnencodedChars(nullToSpecial(value)); + } else if (size != 0) { + final String[] values = new String[size]; + for (int i = 0; i < size; ++i) { + final String value = dimSelector.lookupName(row.get(i)); + values[i] = nullToSpecial(value); + } + // Values need to be sorted to ensure consistent multi-value ordering across different segments + Arrays.sort(values); + for (int i = 0; i < size; ++i) { + if (i != 0) { + hasher.putChar(CARDINALITY_AGG_SEPARATOR); + } + hasher.putUnencodedChars(values[i]); + } + } + } + + @Override + public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) + { + for (IntIterator rowIt = dimSelector.getRow().iterator(); rowIt.hasNext(); ) { + int index = rowIt.nextInt(); + final String value = dimSelector.lookupName(index); + collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(nullToSpecial(value)).asBytes()); + } + } + + private String nullToSpecial(String value) + { + return value == null ? CARDINALITY_AGG_NULL_STRING : value; + } +} diff --git a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java index 4c133bd40491..9a4ba3c2be24 100644 --- a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java +++ b/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java @@ -19,6 +19,9 @@ package io.druid.query.dimension; +/** + * Base type for helper objects that handle value type operations pertaining to a specific query type + */ public interface QueryTypeHelper { } diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 696a41324b33..75ca0798eeec 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -76,30 +76,6 @@ private static GroupByDimensionInfo[] getGroupByDimInfo(QueryDimensionInfo - { - /** - * Indicates the offset of this dimension's value within the grouping key. - */ - private int keyBufferPosition; - - public GroupByDimensionInfo(QueryDimensionInfo baseInfo, int keyBufferPosition) - { - super( - baseInfo.getSpec(), - baseInfo.getQueryHelper(), - baseInfo.getQueryTypeHelper(), - baseInfo.getSelector() - ); - this.keyBufferPosition = keyBufferPosition; - } - - public int getKeyBufferPosition() - { - return keyBufferPosition; - } - } - private GroupByQueryEngineV2() { // No instantiation @@ -206,7 +182,7 @@ public GroupByTypeHelper makeQueryTypeHelper( case STRING: return new StringGroupByTypeHelper(); default: - return null; + throw new IAE("Cannot create query type helper from invalid type [%s]", type); } } } @@ -573,4 +549,28 @@ public void reset() // No state, nothing to reset } } + + private static class GroupByDimensionInfo extends QueryDimensionInfo + { + /** + * Indicates the offset of this dimension's value within the grouping key. + */ + private int keyBufferPosition; + + public GroupByDimensionInfo(QueryDimensionInfo baseInfo, int keyBufferPosition) + { + super( + baseInfo.getSpec(), + baseInfo.getQueryHelper(), + baseInfo.getQueryTypeHelper(), + baseInfo.getSelector() + ); + this.keyBufferPosition = keyBufferPosition; + } + + public int getKeyBufferPosition() + { + return keyBufferPosition; + } + } } diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index fdcefbbab5c3..df2968ef0bbe 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -92,7 +92,7 @@ public SearchTypeHelper makeQueryTypeHelper( case STRING: return new StringSearchTypeHelper(); default: - return null; + throw new IAE("Cannot create query type helper from invalid type [%s]", type); } } } diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index c898dfafe74a..c4b9c6e308dc 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -24,6 +24,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; import io.druid.java.util.common.guava.Sequence; import io.druid.query.QueryRunnerHelper; @@ -74,7 +75,7 @@ public SelectTypeHelper makeQueryTypeHelper( case STRING: return new StringSelectTypeHelper(); default: - return null; + throw new IAE("Cannot create query type helper from invalid type [%s]", type); } } } @@ -82,13 +83,13 @@ public SelectTypeHelper makeQueryTypeHelper( public interface SelectTypeHelper extends QueryTypeHelper { /** - * Read the current row from dimSelector and add the row values to the result map. + * Read the current row from dimSelector and add the row values for a dimension to the result map. * * Multi-valued rows should be added to the result as a List, single value rows should be added as a single object. * * @param outputName Output name for this dimension in the select query being served * @param dimSelector Dimension value selector - * @param resultMap Output map of the select query being served + * @param resultMap Row value map for the current row being retrieved by the select query */ void addRowValuesToSelectResult( String outputName, @@ -100,22 +101,22 @@ void addRowValuesToSelectResult( public static class StringSelectTypeHelper implements SelectTypeHelper { @Override - public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map theEvent) + public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map resultMap) { if (selector == null) { - theEvent.put(outputName, null); + resultMap.put(outputName, null); } else { final IndexedInts vals = selector.getRow(); if (vals.size() == 1) { final String dimVal = selector.lookupName(vals.get(0)); - theEvent.put(outputName, dimVal); + resultMap.put(outputName, dimVal); } else { List dimVals = new ArrayList<>(vals.size()); for (int i = 0; i < vals.size(); ++i) { dimVals.add(selector.lookupName(vals.get(i))); } - theEvent.put(outputName, dimVals); + resultMap.put(outputName, dimVals); } } } diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 7b9b92502940..4c7b3f2c7625 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -22,6 +22,7 @@ import com.google.common.collect.Maps; import io.druid.query.aggregation.Aggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.topn.types.TopNTypeHelper; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; @@ -46,7 +47,7 @@ public DimExtractionTopNAlgorithm( @Override public TopNParams makeInitParams( - final QueryDimensionInfo dimInfo, + final QueryDimensionInfo dimInfo, final Cursor cursor ) { @@ -60,8 +61,8 @@ public TopNParams makeInitParams( @Override protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) { - QueryDimensionInfo dimInfo = params.getDimInfo(); - return dimInfo.getQueryTypeHelper().getDimExtractionRowSelector(params, query, capabilities); + QueryDimensionInfo dimInfo = params.getDimInfo(); + return dimInfo.getQueryTypeHelper().getDimExtractionRowSelector(query, params, capabilities); } @Override @@ -85,15 +86,15 @@ public void scanAndAggregate( ) { final Cursor cursor = params.getCursor(); - final QueryDimensionInfo dimInfo = params.getDimInfo(); + final QueryDimensionInfo dimInfo = params.getDimInfo(); while (!cursor.isDone()) { dimInfo.getQueryTypeHelper().dimExtractionScanAndAggregate( + query, dimInfo.getSelector(), - rowSelector, - aggregatesStore, cursor, - query + rowSelector, + aggregatesStore ); cursor.advance(); } diff --git a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java index 6dbb123cfa2b..cbc9f2519f5b 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java @@ -21,6 +21,7 @@ import io.druid.query.aggregation.Aggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.topn.types.TopNTypeHelper; import io.druid.segment.Cursor; /** @@ -31,7 +32,7 @@ public interface TopNAlgorithm public static final int INIT_POSITION_VALUE = -1; public static final int SKIP_POSITION_VALUE = -2; - public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor); + public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor); public void run( Parameters params, diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index cb4785811d0f..c2191dc31fe4 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -23,143 +23,14 @@ import com.google.common.collect.Lists; import io.druid.query.Result; import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.Aggregator; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; -import io.druid.segment.Capabilities; -import io.druid.segment.ColumnValueSelector; +import io.druid.query.topn.types.TopNTypeHelperFactory; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionSelector; -import io.druid.segment.column.ColumnCapabilities; -import io.druid.segment.column.ValueType; -import io.druid.segment.data.IndexedInts; - -import java.util.Map; public class TopNMapFn implements Function> { private static final TopNTypeHelperFactory TYPE_HELPER_FACTORY = new TopNTypeHelperFactory(); - private static class TopNTypeHelperFactory implements QueryTypeHelperFactory - { - @Override - public TopNTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities - ) - { - ValueType type = capabilities.getType(); - switch(type) { - case STRING: - return new StringTopNTypeHelper(); - default: - return null; - } - } - } - - public interface TopNTypeHelper extends QueryTypeHelper - { - /** - * Used by DimExtractionTopNAlgorithm. - * - * Create an Aggregator[][] using BaseTopNAlgorithm.AggregatorArrayProvider and the given parameters. - * - * As the Aggregator[][] is used as an integer-based lookup, this method is only applicable for dimension types - * that use integer row values. - * - * A dimension type that does not have integer values should return null. - * - * @param params Parameters for the TopN query being served - * @param query The TopN query being served - * @param capabilities Object indicating if dimension values are sorted - * @return an Aggregator[][] for integer-valued dimensions, null otherwise - */ - Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities); - - - /** - * Used by DimExtractionTopNAlgorithm. - * - * Read the current row from a dimension value selector, and for each row value: - * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from - * aggregatesStore (slower map). - * - * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, - * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the - * provided cursor and query, storing them in rowSelector and aggregatesStore - * - * 3. Call aggregate() on each of the aggregators. - * - * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. - * - * @param selector Dimension value selector - * @param rowSelector Integer lookup containing aggregators - * @param aggregatesStore Map containing aggregators - * @param cursor Cursor for the segment being queried - * @param query The TopN query being served. - */ - void dimExtractionScanAndAggregate( - ValueSelectorType selector, - Aggregator[][] rowSelector, - Map aggregatesStore, - Cursor cursor, - TopNQuery query - ); - } - - public static class StringTopNTypeHelper implements TopNTypeHelper - { - @Override - public Aggregator[][] getDimExtractionRowSelector(TopNParams params, TopNQuery query, Capabilities capabilities) - { - // This method is used for the DimExtractionTopNAlgorithm only. - // Unlike regular topN we cannot rely on ordering to optimize. - // Optimization possibly requires a reverse lookup from value to ID, which is - // not possible when applying an extraction function - - final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( - (DimensionSelector) params.getDimSelector(), - query, - params.getCardinality(), - capabilities - ); - - return provider.build(); - } - - @Override - public void dimExtractionScanAndAggregate( - DimensionSelector selector, - Aggregator[][] rowSelector, - Map aggregatesStore, - Cursor cursor, - TopNQuery query - ) - { - final IndexedInts dimValues = selector.getRow(); - - for (int i = 0; i < dimValues.size(); ++i) { - final int dimIndex = dimValues.get(i); - Aggregator[] theAggregators = rowSelector[dimIndex]; - if (theAggregators == null) { - final String key = selector.lookupName(dimIndex); - theAggregators = aggregatesStore.get(key); - if (theAggregators == null) { - theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); - aggregatesStore.put(key, theAggregators); - } - rowSelector[dimIndex] = theAggregators; - } - - for (Aggregator aggregator : theAggregators) { - aggregator.aggregate(); - } - } - } - } - - private final TopNQuery query; private final TopNAlgorithm topNAlgorithm; diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index d31fe7973d1b..16086db688b7 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -20,6 +20,7 @@ package io.druid.query.topn; import io.druid.query.QueryDimensionInfo; +import io.druid.query.topn.types.TopNTypeHelper; import io.druid.segment.Cursor; /** @@ -29,10 +30,10 @@ public class TopNParams private final Cursor cursor; private final int cardinality; private final int numValuesPerPass; - private final QueryDimensionInfo dimInfo; + private final QueryDimensionInfo dimInfo; protected TopNParams( - QueryDimensionInfo dimInfo, + QueryDimensionInfo dimInfo, Cursor cursor, int numValuesPerPass ) @@ -52,7 +53,7 @@ public Object getDimSelector() return dimInfo.getSelector(); } - public QueryDimensionInfo getDimInfo() + public QueryDimensionInfo getDimInfo() { return dimInfo; } diff --git a/processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java b/processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java new file mode 100644 index 000000000000..c0ce6d02b03f --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java @@ -0,0 +1,63 @@ +package io.druid.query.topn.types; + +import io.druid.query.aggregation.Aggregator; +import io.druid.query.topn.BaseTopNAlgorithm; +import io.druid.query.topn.TopNParams; +import io.druid.query.topn.TopNQuery; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.data.IndexedInts; + +import java.util.Map; + +public class StringTopNTypeHelper implements TopNTypeHelper +{ + @Override + public Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, Capabilities capabilities) + { + // This method is used for the DimExtractionTopNAlgorithm only. + // Unlike regular topN we cannot rely on ordering to optimize. + // Optimization possibly requires a reverse lookup from value to ID, which is + // not possible when applying an extraction function + + final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( + (DimensionSelector) params.getDimSelector(), + query, + params.getCardinality(), + capabilities + ); + + return provider.build(); + } + + @Override + public void dimExtractionScanAndAggregate( + final TopNQuery query, + DimensionSelector selector, + Cursor cursor, + Aggregator[][] rowSelector, + Map aggregatesStore + ) + { + final IndexedInts dimValues = selector.getRow(); + + for (int i = 0; i < dimValues.size(); ++i) { + final int dimIndex = dimValues.get(i); + Aggregator[] theAggregators = rowSelector[dimIndex]; + if (theAggregators == null) { + final String key = selector.lookupName(dimIndex); + theAggregators = aggregatesStore.get(key); + if (theAggregators == null) { + theAggregators = BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()); + aggregatesStore.put(key, theAggregators); + } + rowSelector[dimIndex] = theAggregators; + } + + for (Aggregator aggregator : theAggregators) { + aggregator.aggregate(); + } + } + } +} diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java b/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java new file mode 100644 index 000000000000..fae4aa2186c8 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java @@ -0,0 +1,80 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.topn.types; + +import io.druid.query.aggregation.Aggregator; +import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.topn.TopNParams; +import io.druid.query.topn.TopNQuery; +import io.druid.segment.Capabilities; +import io.druid.segment.ColumnValueSelector; +import io.druid.segment.Cursor; + +import java.util.Map; + +public interface TopNTypeHelper extends QueryTypeHelper +{ + /** + * Used by DimExtractionTopNAlgorithm. + * + * Create an Aggregator[][] using BaseTopNAlgorithm.AggregatorArrayProvider and the given parameters. + * + * As the Aggregator[][] is used as an integer-based lookup, this method is only applicable for dimension types + * that use integer row values. + * + * A dimension type that does not have integer values should return null. + * + * @param query The TopN query being served + * @param params Parameters for the TopN query being served + * @param capabilities Object indicating if dimension values are sorted + * @return an Aggregator[][] for integer-valued dimensions, null otherwise + */ + Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, Capabilities capabilities); + + + /** + * Used by DimExtractionTopNAlgorithm. + * + * Read the current row from a dimension value selector, and for each row value: + * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from + * aggregatesStore (slower map). + * + * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, + * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the + * provided cursor and query, storing them in rowSelector and aggregatesStore + * + * 3. Call aggregate() on each of the aggregators. + * + * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. + * + * @param query The TopN query being served. + * @param selector Dimension value selector + * @param cursor Cursor for the segment being queried + * @param rowSelector Integer lookup containing aggregators + * @param aggregatesStore Map containing aggregators + */ + void dimExtractionScanAndAggregate( + final TopNQuery query, + ValueSelectorType selector, + Cursor cursor, + Aggregator[][] rowSelector, + Map aggregatesStore + ); +} diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java b/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java new file mode 100644 index 000000000000..dfb8cdb5f102 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java @@ -0,0 +1,42 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.topn.types; + +import io.druid.java.util.common.IAE; +import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; + +public class TopNTypeHelperFactory implements QueryTypeHelperFactory +{ + @Override + public TopNTypeHelper makeQueryTypeHelper( + String dimName, ColumnCapabilities capabilities + ) + { + ValueType type = capabilities.getType(); + switch(type) { + case STRING: + return new StringTopNTypeHelper(); + default: + throw new IAE("Cannot create query type helper from invalid type [%s]", type); + } + } +} diff --git a/processing/src/main/java/io/druid/segment/ColumnValueSelector.java b/processing/src/main/java/io/druid/segment/ColumnValueSelector.java index 4df130fff945..d73ac68affea 100644 --- a/processing/src/main/java/io/druid/segment/ColumnValueSelector.java +++ b/processing/src/main/java/io/druid/segment/ColumnValueSelector.java @@ -19,6 +19,9 @@ package io.druid.segment; +/** + * Base type for interfaces that manage column value selection, e.g. DimensionSelector, LongColumnSelector + */ public interface ColumnValueSelector { } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index 165f40e91cb2..f31318618ef5 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -19,7 +19,6 @@ package io.druid.segment; -import com.google.common.base.Function; import com.google.common.primitives.Ints; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.segment.column.Column; @@ -33,7 +32,6 @@ import java.io.File; import java.lang.reflect.Array; import java.util.Arrays; -import java.util.Comparator; public class StringDimensionHandler implements DimensionHandler { @@ -221,35 +219,4 @@ public DimensionQueryHelper makeQueryHel { return new StringDimensionQueryHelper(dimensionName); } - - public static final Function STRING_TRANSFORMER = new Function() - { - @Override - public String apply(final Object o) - { - if (o == null) { - return null; - } - if (o instanceof String) { - return (String) o; - } - return o.toString(); - } - }; - - public static final Comparator ENCODED_COMPARATOR = new Comparator() - { - @Override - public int compare(Integer o1, Integer o2) - { - if (o1 == null) { - return o2 == null ? 0 : -1; - } - if (o2 == null) { - return 1; - } - return o1.compareTo(o2); - } - }; - } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java index 16510e93611f..94aa9a97655b 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java @@ -21,7 +21,6 @@ import com.google.common.base.Predicate; import com.google.common.base.Strings; -import com.google.common.primitives.Ints; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.DruidPredicateFactory; @@ -30,24 +29,12 @@ import io.druid.segment.data.IndexedInts; import java.util.BitSet; -import java.util.Comparator; import java.util.Objects; public class StringDimensionQueryHelper implements DimensionQueryHelper { private final String dimensionName; - private static Comparator GROUPING_KEY_COMPARATOR = new Comparator() - { - @Override - public int compare(byte[] o1, byte[] o2) - { - int intLhs = Ints.fromByteArray(o1); - int intRhs = Ints.fromByteArray(o2); - return Ints.compare(intLhs, intRhs); - } - }; - public StringDimensionQueryHelper(String dimensionName) { this.dimensionName = dimensionName; } diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java index e7d16c07a8dd..8128cbc032a9 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java @@ -29,6 +29,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Range; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.segment.DimensionSelector; @@ -43,7 +44,7 @@ public class CardinalityAggregatorBenchmark extends SimpleBenchmark CardinalityBufferAggregator agg; List selectorList; - List> dimInfoList; + List> dimInfoList; ByteBuffer buf; int pos; @@ -81,7 +82,7 @@ public String[] apply(Integer input) final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null); - final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), null, dim1); + final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), null, dim1); selectorList = Lists.newArrayList( (DimensionSelector) dim1 diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index acd43771c808..1b87fcc65e40 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -32,6 +32,8 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.QueryDimensionInfo; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; +import io.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorTypeHelper; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; @@ -246,19 +248,19 @@ private static void bufferAggregate( } } - List> dimInfoList; + List> dimInfoList; List selectorList; CardinalityAggregatorFactory rowAggregatorFactory; CardinalityAggregatorFactory valueAggregatorFactory; final TestDimensionSelector dim1; final TestDimensionSelector dim2; - List> dimInfoListWithExtraction; + List> dimInfoListWithExtraction; List selectorListWithExtraction; final TestDimensionSelector dim1WithExtraction; final TestDimensionSelector dim2WithExtraction; - List> dimInfoListConstantVal; + List> dimInfoListConstantVal; List selectorListConstantVal; final TestDimensionSelector dim1ConstantVal; final TestDimensionSelector dim2ConstantVal; @@ -272,16 +274,16 @@ public CardinalityAggregatorTest() dim2 = new TestDimensionSelector(values2, null); dimInfoList = Lists.newArrayList( - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec1, new StringDimensionQueryHelper("dim1"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim1 ), - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec2, new StringDimensionQueryHelper("dim2"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim2 ) ); @@ -319,16 +321,16 @@ public CardinalityAggregatorTest() dim2WithExtraction ); dimInfoListWithExtraction = Lists.newArrayList( - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec1, new StringDimensionQueryHelper("dim1"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim1WithExtraction ), - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec2, new StringDimensionQueryHelper("dim2"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim2WithExtraction ) ); @@ -342,17 +344,17 @@ public CardinalityAggregatorTest() dim2ConstantVal ); dimInfoListConstantVal = Lists.newArrayList( - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec1, new StringDimensionQueryHelper("dim1"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim1ConstantVal ), - new QueryDimensionInfo( + new QueryDimensionInfo( dimSpec2, new StringDimensionQueryHelper( "dim2"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim2ConstantVal ) ); @@ -437,19 +439,19 @@ public void testCombineRows() { List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo( + List> dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo( dimSpec1, new StringDimensionQueryHelper("dim1"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo( + List> dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo( dimSpec2, new StringDimensionQueryHelper("dim2"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim2 ) ); @@ -485,19 +487,19 @@ public void testCombineValues() List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo( + List> dimInfo1 = Lists.newArrayList( + new QueryDimensionInfo( dimSpec1, new StringDimensionQueryHelper("dim1"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo( + List> dimInfo2 = Lists.newArrayList( + new QueryDimensionInfo( dimSpec2, new StringDimensionQueryHelper("dim2"), - new CardinalityAggregatorFactory.StringCardinalityAggregatorTypeHelper(), + new StringCardinalityAggregatorTypeHelper(), dim2 ) ); From 23c1a87b185d750ef832f1347bcb48ffaa6c82ba Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 19 Dec 2016 18:04:28 -0800 Subject: [PATCH 10/12] PR comments and refactoring --- ...nsionInfo.java => ColumnSelectorPlus.java} | 61 +--- .../FilteredAggregatorFactory.java | 21 +- .../cardinality/CardinalityAggregator.java | 30 +- .../CardinalityAggregatorFactory.java | 26 +- .../CardinalityBufferAggregator.java | 14 +- ...CardinalityAggColumnSelectorStrategy.java} | 12 +- ...lityAggColumnSelectorStrategyFactory.java} | 10 +- ...CardinalityAggColumnSelectorStrategy.java} | 2 +- ...elper.java => ColumnSelectorStrategy.java} | 4 +- ...ava => ColumnSelectorStrategyFactory.java} | 4 +- ...ngValueMatcherColumnSelectorStrategy.java} | 42 +-- .../ValueMatcherColumnSelectorStrategy.java | 45 +++ ...eMatcherColumnSelectorStrategyFactory.java | 45 +++ .../epinephelinae/GroupByQueryEngineV2.java | 265 +++++++++++------- .../druid/query/search/SearchQueryRunner.java | 32 +-- .../druid/query/select/SelectQueryEngine.java | 28 +- .../AggregateTopNMetricFirstAlgorithm.java | 10 +- .../topn/DimExtractionTopNAlgorithm.java | 18 +- .../druid/query/topn/PooledTopNAlgorithm.java | 22 +- .../topn/TimeExtractionTopNAlgorithm.java | 6 +- .../io/druid/query/topn/TopNAlgorithm.java | 6 +- .../java/io/druid/query/topn/TopNMapFn.java | 14 +- .../java/io/druid/query/topn/TopNParams.java | 18 +- ... => StringTopNColumnSelectorStrategy.java} | 8 +- ...r.java => TopNColumnSelectorStrategy.java} | 6 +- ...rFactory.java => TopNStrategyFactory.java} | 10 +- .../io/druid/segment/DimensionHandler.java | 4 - .../druid/segment/DimensionHandlerUtils.java | 79 +++--- .../druid/segment/DimensionQueryHelper.java | 104 ------- .../segment/QueryableIndexStorageAdapter.java | 21 +- .../druid/segment/StringDimensionHandler.java | 6 - .../CardinalityAggregatorBenchmark.java | 15 +- .../CardinalityAggregatorTest.java | 112 ++++---- 33 files changed, 537 insertions(+), 563 deletions(-) rename processing/src/main/java/io/druid/query/{QueryDimensionInfo.java => ColumnSelectorPlus.java} (55%) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{CardinalityAggregatorTypeHelper.java => CardinalityAggColumnSelectorStrategy.java} (85%) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{CardinalityAggregatorTypeHelperFactory.java => CardinalityAggColumnSelectorStrategyFactory.java} (75%) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{StringCardinalityAggregatorTypeHelper.java => StringCardinalityAggColumnSelectorStrategy.java} (95%) rename processing/src/main/java/io/druid/query/dimension/{QueryTypeHelper.java => ColumnSelectorStrategy.java} (85%) rename processing/src/main/java/io/druid/query/dimension/{QueryTypeHelperFactory.java => ColumnSelectorStrategyFactory.java} (81%) rename processing/src/main/java/io/druid/{segment/StringDimensionQueryHelper.java => query/filter/StringValueMatcherColumnSelectorStrategy.java} (79%) create mode 100644 processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java create mode 100644 processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java rename processing/src/main/java/io/druid/query/topn/types/{StringTopNTypeHelper.java => StringTopNColumnSelectorStrategy.java} (90%) rename processing/src/main/java/io/druid/query/topn/types/{TopNTypeHelper.java => TopNColumnSelectorStrategy.java} (93%) rename processing/src/main/java/io/druid/query/topn/types/{TopNTypeHelperFactory.java => TopNStrategyFactory.java} (77%) delete mode 100644 processing/src/main/java/io/druid/segment/DimensionQueryHelper.java diff --git a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java b/processing/src/main/java/io/druid/query/ColumnSelectorPlus.java similarity index 55% rename from processing/src/main/java/io/druid/query/QueryDimensionInfo.java rename to processing/src/main/java/io/druid/query/ColumnSelectorPlus.java index 74e48fa92e5b..6dd2872aa96c 100644 --- a/processing/src/main/java/io/druid/query/QueryDimensionInfo.java +++ b/processing/src/main/java/io/druid/query/ColumnSelectorPlus.java @@ -19,33 +19,21 @@ package io.druid.query; -import io.druid.query.dimension.DimensionSpec; -import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.ColumnSelectorStrategy; import io.druid.segment.ColumnValueSelector; -import io.druid.segment.DimensionQueryHelper; /** * A grouping of various related objects used during query processing for a single dimension, used for convenience. * - * Each QueryDimensionInfo is associated with a single dimension. + * Each ColumnSelectorPlus is associated with a single dimension. */ -public class QueryDimensionInfo +public class ColumnSelectorPlus { - /** - * The DimensionSpec representing this QueryDimensionInfo's dimension, taken from the query being processed. - */ - private final DimensionSpec spec; - - /** - * Helper object that handles general type-specific operations for this dimension within query processing engines. - */ - private final DimensionQueryHelper queryHelper; - /** * Helper object that handles row value operations that pertain to a specific query type for this * dimension within query processing engines. */ - private final QueryTypeHelperClass queryTypeHelper; + private final ColumnSelectorStrategyClass columnSelectorStrategy; /** * Internal name of the dimension. @@ -62,40 +50,22 @@ public class QueryDimensionInfo */ private final ColumnValueSelector selector; - /** - * Cardinality of the dimension's value set, taken from the queryHelper. - */ - private final int cardinality; - - public QueryDimensionInfo( - DimensionSpec spec, - DimensionQueryHelper queryHelper, - QueryTypeHelperClass queryTypeHelper, + public ColumnSelectorPlus( + String columnName, + String outputName, + ColumnSelectorStrategyClass columnSelectorStrategy, ColumnValueSelector selector ) { - this.spec = spec; - this.queryHelper = queryHelper; - this.queryTypeHelper = queryTypeHelper; - this.name = spec.getDimension(); - this.outputName = spec.getOutputName(); + this.columnSelectorStrategy = columnSelectorStrategy; + this.name = columnName; + this.outputName = outputName; this.selector = selector; - this.cardinality = queryHelper.getCardinality(selector); } - public DimensionSpec getSpec() + public ColumnSelectorStrategyClass getColumnSelectorStrategy() { - return spec; - } - - public DimensionQueryHelper getQueryHelper() - { - return queryHelper; - } - - public QueryTypeHelperClass getQueryTypeHelper() - { - return queryTypeHelper; + return columnSelectorStrategy; } public String getName() @@ -112,9 +82,4 @@ public ColumnValueSelector getSelector() { return selector; } - - public int getCardinality() - { - return cardinality; - } } diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index 7655f639d1a3..b0f35937b6e9 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -25,10 +25,10 @@ import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.query.filter.ValueMatcherColumnSelectorStrategy; +import io.druid.query.filter.ValueMatcherColumnSelectorStrategyFactory; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -206,6 +206,9 @@ public int hashCode() private static class FilteredAggregatorValueMatcherFactory implements ValueMatcherFactory { + private static final ValueMatcherColumnSelectorStrategyFactory STRATEGY_FACTORY = + new ValueMatcherColumnSelectorStrategyFactory(); + private final ColumnSelectorFactory columnSelectorFactory; public FilteredAggregatorValueMatcherFactory(ColumnSelectorFactory columnSelectorFactory) @@ -223,13 +226,12 @@ public ValueMatcher makeValueMatcher(final String dimension, final String value) ); } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( dimension, - columnSelectorFactory.getColumnCapabilities(dimension), - null + columnSelectorFactory.getColumnCapabilities(dimension) ); - return queryHelper.getValueMatcher(columnSelectorFactory, value); + return strategy.getValueMatcher(columnSelectorFactory, value); } public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicateFactory predicateFactory) @@ -239,12 +241,11 @@ public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicat case LONG: return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); case STRING: - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( dimension, - columnSelectorFactory.getColumnCapabilities(dimension), - null + columnSelectorFactory.getColumnCapabilities(dimension) ); - return queryHelper.getValueMatcher(columnSelectorFactory, predicateFactory); + return strategy.getValueMatcher(columnSelectorFactory, predicateFactory); default: return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index da9952d04c5c..b27256df04c6 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -23,8 +23,8 @@ import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.util.List; @@ -32,35 +32,35 @@ public class CardinalityAggregator implements Aggregator { private final String name; - private final List> dimInfoList; + private final List> selectorPlusList; private final boolean byRow; public static final HashFunction hashFn = Hashing.murmur3_128(); protected static void hashRow( - List> dimInfoList, + List> selectorPlusList, HyperLogLogCollector collector ) { final Hasher hasher = hashFn.newHasher(); - for (int k = 0; k < dimInfoList.size(); ++k) { + for (int k = 0; k < selectorPlusList.size(); ++k) { if (k != 0) { hasher.putByte((byte) 0); } - QueryDimensionInfo dimInfo = dimInfoList.get(k); - dimInfo.getQueryTypeHelper().hashRow(dimInfo.getSelector(), hasher); + ColumnSelectorPlus selectorPlus = selectorPlusList.get(k); + selectorPlus.getColumnSelectorStrategy().hashRow(selectorPlus.getSelector(), hasher); } collector.add(hasher.hash().asBytes()); } protected static void hashValues( - List> dimInfoList, + List> selectorPlusList, HyperLogLogCollector collector ) { - for (final QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryTypeHelper().hashValues(dimInfo.getSelector(), collector); + for (final ColumnSelectorPlus selectorPlus : selectorPlusList) { + selectorPlus.getColumnSelectorStrategy().hashValues(selectorPlus.getSelector(), collector); } } @@ -68,12 +68,12 @@ protected static void hashValues( public CardinalityAggregator( String name, - List> dimInfoList, + List> selectorPlusList, boolean byRow ) { this.name = name; - this.dimInfoList = dimInfoList; + this.selectorPlusList = selectorPlusList; this.collector = HyperLogLogCollector.makeLatestCollector(); this.byRow = byRow; } @@ -82,9 +82,9 @@ public CardinalityAggregator( public void aggregate() { if (byRow) { - hashRow(dimInfoList, collector); + hashRow(selectorPlusList, collector); } else { - hashValues(dimInfoList, collector); + hashValues(selectorPlusList, collector); } } @@ -121,7 +121,7 @@ public String getName() @Override public Aggregator clone() { - return new CardinalityAggregator(name, dimInfoList, byRow); + return new CardinalityAggregator(name, selectorPlusList, byRow); } @Override diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index f4f4a84d974e..04743e624a08 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -26,14 +26,14 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.druid.java.util.common.StringUtils; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactoryNotMergeableException; import io.druid.query.aggregation.Aggregators; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelperFactory; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategyFactory; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; @@ -96,8 +96,8 @@ public static Object estimateCardinality(Object object) private static final byte CACHE_TYPE_ID = (byte) 0x8; private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF; - private static final CardinalityAggregatorTypeHelperFactory TYPE_HELPER_FACTORY = - new CardinalityAggregatorTypeHelperFactory(); + private static final CardinalityAggColumnSelectorStrategyFactory STRATEGY_FACTORY = + new CardinalityAggColumnSelectorStrategyFactory(); private final String name; private final List fields; @@ -137,38 +137,38 @@ public CardinalityAggregatorFactory( @Override public Aggregator factorize(final ColumnSelectorFactory columnFactory) { - List> dimInfoList = + List> selectorPlusList = Arrays.asList(DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + STRATEGY_FACTORY, fields, null, columnFactory )); - if (dimInfoList.isEmpty()) { + if (selectorPlusList.isEmpty()) { return Aggregators.noopAggregator(); } - return new CardinalityAggregator(name, dimInfoList, byRow); + return new CardinalityAggregator(name, selectorPlusList, byRow); } @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) { - List> dimInfoList = + List> selectorPlusList = Arrays.asList(DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + STRATEGY_FACTORY, fields, null, columnFactory )); - if (dimInfoList.isEmpty()) { + if (selectorPlusList.isEmpty()) { return Aggregators.noopBufferAggregator(); } - return new CardinalityBufferAggregator(dimInfoList, byRow); + return new CardinalityBufferAggregator(selectorPlusList, byRow); } @Override diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index 387babfe2565..73a1169ffc6e 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -20,8 +20,8 @@ package io.druid.query.aggregation.cardinality; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.nio.ByteBuffer; @@ -29,17 +29,17 @@ public class CardinalityBufferAggregator implements BufferAggregator { - private final List> dimInfoList; + private final List> selectorPlusList; private final boolean byRow; private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); public CardinalityBufferAggregator( - List> dimInfoList, + List> selectorPlusList, boolean byRow ) { - this.dimInfoList = dimInfoList; + this.selectorPlusList = selectorPlusList; this.byRow = byRow; } @@ -63,9 +63,9 @@ public void aggregate(ByteBuffer buf, int position) try { final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf); if (byRow) { - CardinalityAggregator.hashRow(dimInfoList, collector); + CardinalityAggregator.hashRow(selectorPlusList, collector); } else { - CardinalityAggregator.hashValues(dimInfoList, collector); + CardinalityAggregator.hashValues(selectorPlusList, collector); } } finally { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java similarity index 85% rename from processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java rename to processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java index 4d6c1477bf47..1bdfad6f0d4a 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorTypeHelper.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java @@ -21,14 +21,13 @@ import com.google.common.hash.Hasher; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; -import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.ColumnSelectorStrategy; import io.druid.segment.ColumnValueSelector; -public interface CardinalityAggregatorTypeHelper extends QueryTypeHelper +public interface CardinalityAggColumnSelectorStrategy extends + ColumnSelectorStrategy { - /** - * Used by CardinalityAggregator. - * + /*** * Retrieve the current row from dimSelector and add the row values to the hasher. * * @param dimSelector Dimension value selector @@ -38,9 +37,8 @@ public interface CardinalityAggregatorTypeHelper +public class CardinalityAggColumnSelectorStrategyFactory implements ColumnSelectorStrategyFactory { @Override - public CardinalityAggregatorTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities + public CardinalityAggColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringCardinalityAggregatorTypeHelper(); + return new StringCardinalityAggColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java similarity index 95% rename from processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java rename to processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java index d2d3e023c98f..6cdc91add33e 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorTypeHelper.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java @@ -28,7 +28,7 @@ import java.util.Arrays; -public class StringCardinalityAggregatorTypeHelper implements CardinalityAggregatorTypeHelper +public class StringCardinalityAggColumnSelectorStrategy implements CardinalityAggColumnSelectorStrategy { public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; diff --git a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategy.java similarity index 85% rename from processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java rename to processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategy.java index 9a4ba3c2be24..5e957d30e265 100644 --- a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelper.java +++ b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategy.java @@ -20,8 +20,8 @@ package io.druid.query.dimension; /** - * Base type for helper objects that handle value type operations pertaining to a specific query type + * Base type for strategy objects that handle value type operations pertaining to a specific query type */ -public interface QueryTypeHelper +public interface ColumnSelectorStrategy { } diff --git a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java similarity index 81% rename from processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java rename to processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java index 71ddfb103f42..fd41eb2ae3e0 100644 --- a/processing/src/main/java/io/druid/query/dimension/QueryTypeHelperFactory.java +++ b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java @@ -21,7 +21,7 @@ import io.druid.segment.column.ColumnCapabilities; -public interface QueryTypeHelperFactory +public interface ColumnSelectorStrategyFactory { - QueryTypeHelperClass makeQueryTypeHelper(String dimName, ColumnCapabilities capabilities); + ColumnSelectorClass makeColumnSelectorStrategy(String columnName, ColumnCapabilities capabilities); } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java b/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java similarity index 79% rename from processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java rename to processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java index 8afb42b1b0f3..38c3f869c956 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionQueryHelper.java +++ b/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java @@ -17,49 +17,25 @@ * under the License. */ -package io.druid.segment; +package io.druid.query.filter; import com.google.common.base.Predicate; import com.google.common.base.Strings; import io.druid.query.dimension.DefaultDimensionSpec; -import io.druid.query.dimension.DimensionSpec; -import io.druid.query.filter.DruidPredicateFactory; -import io.druid.query.filter.ValueMatcher; -import io.druid.segment.data.EmptyIndexedInts; +import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.DimensionSelector; import io.druid.segment.data.IndexedInts; import java.util.BitSet; import java.util.Objects; -public class StringDimensionQueryHelper implements DimensionQueryHelper +public class StringValueMatcherColumnSelectorStrategy implements ValueMatcherColumnSelectorStrategy { - private final String dimensionName; + private final String columnName; - public StringDimensionQueryHelper(String dimensionName) { - this.dimensionName = dimensionName; - } - - @Override - public DimensionSelector getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory) - { - return columnSelectorFactory.makeDimensionSelector(dimensionSpec); - } - - @Override - public IndexedInts getRowFromDimSelector(DimensionSelector selector) { - return selector == null ? EmptyIndexedInts.EMPTY_INDEXED_INTS : selector.getRow(); - } - - @Override - public int getRowSize(IndexedInts rowValues) - { - return rowValues.size(); - } - - @Override - public int getCardinality(DimensionSelector valueSelector) + public StringValueMatcherColumnSelectorStrategy(String columnName) { - return valueSelector.getValueCardinality(); + this.columnName = columnName; } @Override @@ -67,7 +43,7 @@ public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final String v { final String valueStr = Strings.emptyToNull(value); final DimensionSelector selector = cursor.makeDimensionSelector( - new DefaultDimensionSpec(dimensionName, dimensionName) + new DefaultDimensionSpec(columnName, columnName) ); // if matching against null, rows with size 0 should also match @@ -128,7 +104,7 @@ public boolean matches() public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory) { final DimensionSelector selector = cursor.makeDimensionSelector( - new DefaultDimensionSpec(dimensionName, dimensionName) + new DefaultDimensionSpec(columnName, columnName) ); final Predicate predicate = predicateFactory.makeStringPredicate(); diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java new file mode 100644 index 000000000000..f1059341b0e2 --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java @@ -0,0 +1,45 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.filter; + +import io.druid.query.dimension.ColumnSelectorStrategy; +import io.druid.segment.ColumnSelectorFactory; + +public interface ValueMatcherColumnSelectorStrategy extends ColumnSelectorStrategy +{ + /** + * Create a single value ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. + * + * @param cursor ColumnSelectorFactory for creating dimension value selectors + * @param value Value to match against + * @return ValueMatcher that matches on 'value' + */ + ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, String value); + + + /** + * Create a predicate-based ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. + * + * @param cursor ColumnSelectorFactory for creating dimension value selectors + * @param predicateFactory A DruidPredicateFactory that provides the filter predicates to be matched + * @return A ValueMatcher that applies the predicate for this DimensionQueryHelper's value type from the predicateFactory + */ + ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); +} diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java new file mode 100644 index 000000000000..cbcb326751b3 --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java @@ -0,0 +1,45 @@ +package io.druid.query.filter; + +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import io.druid.java.util.common.IAE; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; +import io.druid.segment.DimensionHandlerUtils; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.column.ValueType; + +public class ValueMatcherColumnSelectorStrategyFactory + implements ColumnSelectorStrategyFactory +{ + @Override + public ValueMatcherColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities + ) + { + capabilities = DimensionHandlerUtils.getEffectiveCapabilities(columnName, capabilities, null); + ValueType type = capabilities.getType(); + switch (type) { + case STRING: + return new StringValueMatcherColumnSelectorStrategy(columnName); + default: + throw new IAE("Cannot create query type helper from invalid type [%s]", type); + } + } +} diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 0522fd3af8d9..156d6fd3e555 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -34,16 +34,16 @@ import io.druid.java.util.common.guava.ResourceClosingSequence; import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.Sequences; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.AggregatorFactory; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.query.dimension.ColumnSelectorStrategy; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.query.groupby.GroupByQuery; import io.druid.query.groupby.GroupByQueryConfig; import io.druid.query.groupby.strategy.GroupByStrategyV2; +import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; -import io.druid.segment.DimensionQueryHelper; import io.druid.segment.DimensionSelector; import io.druid.segment.StorageAdapter; import io.druid.segment.column.ColumnCapabilities; @@ -64,15 +64,15 @@ public class GroupByQueryEngineV2 { - private static final GroupByTypeHelperFactory TYPE_HELPER_FACTORY = new GroupByTypeHelperFactory(); + private static final GroupByStrategyFactory STRATEGY_FACTORY = new GroupByStrategyFactory(); - private static GroupByDimensionInfo[] getGroupByDimInfo(QueryDimensionInfo[] baseDimInfo) + private static GroupByColumnSelectorPlus[] getGroupBySelectorPlus(ColumnSelectorPlus[] baseSelectorPlus) { - GroupByDimensionInfo[] retInfo = new GroupByDimensionInfo[baseDimInfo.length]; + GroupByColumnSelectorPlus[] retInfo = new GroupByColumnSelectorPlus[baseSelectorPlus.length]; int curPos = 0; for (int i = 0; i < retInfo.length; i++) { - retInfo[i] = new GroupByDimensionInfo(baseDimInfo[i], curPos); - curPos += retInfo[i].getQueryTypeHelper().getGroupingKeySize(); + retInfo[i] = new GroupByColumnSelectorPlus(baseSelectorPlus[i], curPos); + curPos += retInfo[i].getColumnSelectorStrategy().getGroupingKeySize(); } return retInfo; } @@ -134,8 +134,8 @@ public Sequence apply(final Cursor cursor) @Override public GroupByEngineIterator make() { - QueryDimensionInfo[] dimInfo = DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils.getDimensionInfo( + STRATEGY_FACTORY, query.getDimensions(), storageAdapter, cursor @@ -146,7 +146,7 @@ public GroupByEngineIterator make() cursor, bufferHolder.get(), fudgeTimestamp, - getGroupByDimInfo(dimInfo) + getGroupBySelectorPlus(selectorPlus) ); } @@ -172,17 +172,17 @@ public void close() throws IOException ); } - private static class GroupByTypeHelperFactory implements QueryTypeHelperFactory + private static class GroupByStrategyFactory implements ColumnSelectorStrategyFactory { @Override - public GroupByTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities + public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringGroupByTypeHelper(); + return new StringGroupByColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } @@ -193,11 +193,11 @@ public GroupByTypeHelper makeQueryTypeHelper( * Contains a collection of query processing methods for type-specific operations used exclusively by * GroupByQueryEngineV2. * - * Each GroupByTypeHelper is associated with a single dimension. + * Each GroupByColumnSelectorStrategy is associated with a single dimension. * * @param The type of the row values object for this dimension */ - private interface GroupByTypeHelper extends QueryTypeHelper + private interface GroupByColumnSelectorStrategy extends ColumnSelectorStrategy { /** * Return the size, in bytes, of this dimension's values in the grouping key. @@ -208,37 +208,6 @@ private interface GroupByTypeHelper extends QueryTypeHelper */ int getGroupingKeySize(); - /** - * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and write that value - * to the keyBuffer at keyBufferPosition. If rowSize is 0, write GROUP_BY_MISSING_VALUE instead. - * - * @param valuesObj row values object - * @param keyBuffer grouping key - * @param keyBufferPosition offset within grouping key - */ - void initializeGroupingKeyV2Dimension( - final RowValuesType valuesObj, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ); - - - /** - * Read the value at rowValueIdx from a row values object and write that value to the keyBuffer at keyBufferPosition. - * - * @param values row values object - * @param rowValueIdx index of the value to read - * @param keyBuffer grouping key - * @param keyBufferPosition offset within grouping key - */ - void addValueToGroupingKeyV2( - RowValuesType values, - int rowValueIdx, - ByteBuffer keyBuffer, - final int keyBufferPosition - ); - - /** * Read a value from a grouping key and add it to the group by query result map, using the output name specified * in a DimensionSpec. @@ -246,23 +215,61 @@ void addValueToGroupingKeyV2( * An implementation may choose to not add anything to the result map * (e.g., as the String implementation does for empty rows) * - * dimInfo provides access to: + * selectorPlus provides access to: * - the keyBufferPosition offset from which to read the value * - the dimension value selector * - the DimensionSpec for this dimension from the query * - * @param dimInfo dimension info containing the key offset, value selector, and dimension spec + * @param selectorPlus dimension info containing the key offset, value selector, and dimension spec * @param resultMap result map for the group by query being served * @param key grouping key */ - void processValueFromGroupingKeyV2( - GroupByDimensionInfo dimInfo, + void processValueFromGroupingKey( + GroupByColumnSelectorPlus selectorPlus, ByteBuffer key, Map resultMap ); + + /** + * Retrieve a row object from the ColumnSelectorPlus and put it in valuess at columnIndex. + * + * @param selector Value selector for a column. + * @param columnIndex Index of the column within the row values array + * @param valuess Row values array, one index per column + */ + void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess); + + /** + * Read the first value within a row values object (IndexedInts, IndexedLongs, etc.) and write that value + * to the keyBuffer at keyBufferPosition. If rowSize is 0, write GROUP_BY_MISSING_VALUE instead. + * + * If the size of the row is > 0, write 1 to stack[] at columnIndex, otherwise write 0. + * + * @param keyBufferPosition Starting offset for this column's value within the grouping key. + * @param columnIndex Index of the column within the row values array + * @param rowObj Row value object for this column (e.g., IndexedInts) + * @param keyBuffer grouping key + * @param stack array containing the current within-row value index for each column + */ + void initGroupingKeyColumnValue(int keyBufferPosition, int columnIndex, Object rowObj, ByteBuffer keyBuffer, int[] stack); + + /** + * If rowValIdx is less than the size of rowObj (haven't handled all of the row values): + * First, read the value at rowValIdx from a rowObj and write that value to the keyBuffer at keyBufferPosition. + * Then return true + * + * Otherwise, return false. + * + * @param keyBufferPosition Starting offset for this column's value within the grouping key. + * @param rowObj Row value object for this column (e.g., IndexedInts) + * @param rowValIdx Index of the current value being grouped on within the row + * @param keyBuffer grouping key + * @return true if rowValIdx < size of rowObj, false otherwise + */ + boolean checkRowIndexAndAddValueToGroupingKey(int keyBufferPosition, Object rowObj, int rowValIdx, ByteBuffer keyBuffer); } - private static class StringGroupByTypeHelper implements GroupByTypeHelper + private static class StringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy { private static final int GROUP_BY_MISSING_VALUE = -1; @@ -273,47 +280,67 @@ public int getGroupingKeySize() } @Override - public void initializeGroupingKeyV2Dimension( - final IndexedInts values, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ) + public void processValueFromGroupingKey(GroupByColumnSelectorPlus selectorPlus, ByteBuffer key, Map resultMap) { - int rowSize = values.size(); - if (rowSize == 0) { - keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + final int id = key.getInt(selectorPlus.getKeyBufferPosition()); + + // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. + if (id != GROUP_BY_MISSING_VALUE) { + resultMap.put( + selectorPlus.getOutputName(), + ((DimensionSelector) selectorPlus.getSelector()).lookupName(id) + ); } else { - keyBuffer.putInt(keyBufferPosition, values.get(0)); + resultMap.put(selectorPlus.getOutputName(), ""); } } @Override - public void addValueToGroupingKeyV2( - final IndexedInts values, - final int rowValueIdx, - final ByteBuffer keyBuffer, - final int keyBufferPosition - ) + public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) { - keyBuffer.putInt( - keyBufferPosition, - values.get(rowValueIdx) - ); + DimensionSelector dimSelector = (DimensionSelector) selector; + IndexedInts row = dimSelector.getRow(); + valuess[columnIndex] = row; } @Override - public void processValueFromGroupingKeyV2(GroupByDimensionInfo dimInfo, ByteBuffer key, Map resultMap) + public void initGroupingKeyColumnValue(int keyBufferPosition, int columnIndex, Object rowObj, ByteBuffer keyBuffer, int[] stack) { - final int id = key.getInt(dimInfo.getKeyBufferPosition()); + IndexedInts row = (IndexedInts) rowObj; + int rowSize = row.size(); - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - resultMap.put( - dimInfo.getOutputName(), - ((DimensionSelector) dimInfo.getSelector()).lookupName(id) + initializeGroupingKeyV2Dimension(row, rowSize, keyBuffer, keyBufferPosition); + stack[columnIndex] = rowSize == 0 ? 0 : 1; + } + + @Override + public boolean checkRowIndexAndAddValueToGroupingKey(int keyBufferPosition, Object rowObj, int rowValIdx, ByteBuffer keyBuffer) + { + IndexedInts row = (IndexedInts) rowObj; + int rowSize = row.size(); + + if (rowValIdx < rowSize) { + keyBuffer.putInt( + keyBufferPosition, + row.get(rowValIdx) ); + return true; } else { - resultMap.put(dimInfo.getOutputName(), ""); + return false; + } + } + + private void initializeGroupingKeyV2Dimension( + final IndexedInts values, + final int rowSize, + final ByteBuffer keyBuffer, + final int keyBufferPosition + ) + { + if (rowSize == 0) { + keyBuffer.putInt(keyBufferPosition, GROUP_BY_MISSING_VALUE); + } else { + keyBuffer.putInt(keyBufferPosition, values.get(0)); } } } @@ -329,7 +356,7 @@ private static class GroupByEngineIterator implements Iterator, Closeable private final ByteBuffer keyBuffer; private final int[] stack; private final Object[] valuess; - private final GroupByDimensionInfo[] dims; + private final GroupByColumnSelectorPlus[] dims; private int stackp = Integer.MIN_VALUE; private boolean currentRowWasPartiallyAggregated = false; @@ -341,7 +368,7 @@ public GroupByEngineIterator( final Cursor cursor, final ByteBuffer buffer, final DateTime fudgeTimestamp, - final GroupByDimensionInfo[] dims + final GroupByColumnSelectorPlus[] dims ) { final int dimCount = query.getDimensions().size(); @@ -395,11 +422,19 @@ public Row next() stackp = stack.length - 1; for (int i = 0; i < dims.length; i++) { - final DimensionQueryHelper queryHelper = dims[i].getQueryHelper(); - valuess[i] = queryHelper.getRowFromDimSelector(dims[i].getSelector()); - int rowSize = queryHelper.getRowSize(valuess[i]); - dims[i].getQueryTypeHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); - stack[i] = rowSize == 0 ? 0 : 1; + GroupByColumnSelectorStrategy strategy = dims[i].getColumnSelectorStrategy(); + strategy.initColumnValues( + dims[i].getSelector(), + i, + valuess + ); + strategy.initGroupingKeyColumnValue( + dims[i].getKeyBufferPosition(), + i, + valuess[i], + keyBuffer, + stack + ); } } @@ -417,17 +452,29 @@ public Row next() doAggregate = false; } - if (stackp >= 0 && stack[stackp] < dims[stackp].getQueryHelper().getRowSize(valuess[stackp])) { - // Load next value for current slot - dims[stackp].getQueryTypeHelper().addValueToGroupingKeyV2(valuess[stackp], stack[stackp], keyBuffer, dims[stackp].getKeyBufferPosition()); - stack[stackp]++; - for (int i = stackp + 1; i < stack.length; i++) { - int rowSize = dims[i].getQueryHelper().getRowSize(valuess[i]); - dims[i].getQueryTypeHelper().initializeGroupingKeyV2Dimension(valuess[i], keyBuffer, dims[i].getKeyBufferPosition()); - stack[i] = rowSize == 0 ? 0 : 1; + if (stackp >= 0) { + doAggregate = dims[stackp].getColumnSelectorStrategy().checkRowIndexAndAddValueToGroupingKey( + dims[stackp].getKeyBufferPosition(), + valuess[stackp], + stack[stackp], + keyBuffer + ); + + if (doAggregate) { + stack[stackp]++; + for (int i = stackp + 1; i < stack.length; i++) { + dims[i].getColumnSelectorStrategy().initGroupingKeyColumnValue( + dims[i].getKeyBufferPosition(), + i, + valuess[i], + keyBuffer, + stack + ); + } + stackp = stack.length - 1; + } else { + stackp--; } - stackp = stack.length - 1; - doAggregate = true; } else { stackp--; } @@ -449,9 +496,9 @@ public Row apply(final Grouper.Entry entry) Map theMap = Maps.newLinkedHashMap(); // Add dimensions. - for (GroupByDimensionInfo dimInfo : dims) { - dimInfo.getQueryTypeHelper().processValueFromGroupingKeyV2( - dimInfo, + for (GroupByColumnSelectorPlus selectorPlus : dims) { + selectorPlus.getColumnSelectorStrategy().processValueFromGroupingKey( + selectorPlus, entry.getKey(), theMap ); @@ -503,11 +550,11 @@ private static class GroupByEngineKeySerde implements Grouper.KeySerde + private static class GroupByColumnSelectorPlus extends ColumnSelectorPlus { /** * Indicates the offset of this dimension's value within the grouping key. */ private int keyBufferPosition; - public GroupByDimensionInfo(QueryDimensionInfo baseInfo, int keyBufferPosition) + public GroupByColumnSelectorPlus(ColumnSelectorPlus baseInfo, int keyBufferPosition) { super( - baseInfo.getSpec(), - baseInfo.getQueryHelper(), - baseInfo.getQueryTypeHelper(), + baseInfo.getName(), + baseInfo.getOutputName(), + baseInfo.getColumnSelectorStrategy(), baseInfo.getSelector() ); this.keyBufferPosition = keyBufferPosition; diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 07c5156de820..a811ab9733f1 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -34,14 +34,14 @@ import io.druid.java.util.common.guava.FunctionalIterable; import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.Sequences; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.Druids; import io.druid.query.Query; import io.druid.query.QueryRunner; import io.druid.query.Result; -import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DimensionSpec; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.query.dimension.ColumnSelectorStrategy; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.IdentityExtractionFn; import io.druid.query.filter.Filter; @@ -76,29 +76,29 @@ */ public class SearchQueryRunner implements QueryRunner> { - private static final SearchTypeHelperFactory TYPE_HELPER_FACTORY = new SearchTypeHelperFactory(); + private static final SearchStrategyFactory STRATEGY_FACTORY = new SearchStrategyFactory(); private static final EmittingLogger log = new EmittingLogger(SearchQueryRunner.class); private final Segment segment; - private static class SearchTypeHelperFactory implements QueryTypeHelperFactory + private static class SearchStrategyFactory implements ColumnSelectorStrategyFactory { @Override - public SearchTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities + public SearchColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringSearchTypeHelper(); + return new StringSearchColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } } } - public interface SearchTypeHelper extends QueryTypeHelper + public interface SearchColumnSelectorStrategy extends ColumnSelectorStrategy { /** * Read the current row from dimSelector and update the search result set. @@ -123,7 +123,7 @@ void updateSearchResultSet( ); } - public static class StringSearchTypeHelper implements SearchTypeHelper + public static class StringSearchColumnSelectorStrategy implements SearchColumnSelectorStrategy { @Override public void updateSearchResultSet( @@ -340,9 +340,9 @@ public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap return set; } - List> dimInfoList = Arrays.asList( + List> selectorPlusList = Arrays.asList( DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + STRATEGY_FACTORY, nonBitmapDims, adapter, cursor @@ -350,10 +350,10 @@ public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap ); while (!cursor.isDone()) { - for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryTypeHelper().updateSearchResultSet( - dimInfo.getOutputName(), - dimInfo.getSelector(), + for (ColumnSelectorPlus selectorPlus : selectorPlusList) { + selectorPlus.getColumnSelectorStrategy().updateSearchResultSet( + selectorPlus.getOutputName(), + selectorPlus.getSelector(), searchQuerySpec, limit, set diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index d15f35db3a37..8f80844adbf9 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -27,13 +27,13 @@ import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; import io.druid.java.util.common.guava.Sequence; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.QueryRunnerHelper; import io.druid.query.Result; -import io.druid.query.QueryDimensionInfo; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.query.dimension.ColumnSelectorStrategy; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.query.filter.Filter; import io.druid.segment.ColumnValueSelector; import io.druid.segment.Cursor; @@ -62,26 +62,26 @@ */ public class SelectQueryEngine { - private static final SelectTypeHelperFactory TYPE_HELPER_FACTORY = new SelectTypeHelperFactory(); + private static final SelectStrategyFactory STRATEGY_FACTORY = new SelectStrategyFactory(); - private static class SelectTypeHelperFactory implements QueryTypeHelperFactory + private static class SelectStrategyFactory implements ColumnSelectorStrategyFactory { @Override - public SelectTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities + public SelectColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringSelectTypeHelper(); + return new StringSelectColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } } } - public interface SelectTypeHelper extends QueryTypeHelper + public interface SelectColumnSelectorStrategy extends ColumnSelectorStrategy { /** * Read the current row from dimSelector and add the row values for a dimension to the result map. @@ -99,7 +99,7 @@ void addRowValuesToSelectResult( ); } - public static class StringSelectTypeHelper implements SelectTypeHelper + public static class StringSelectColumnSelectorStrategy implements SelectColumnSelectorStrategy { @Override public void addRowValuesToSelectResult(String outputName, DimensionSelector selector, Map resultMap) @@ -177,9 +177,9 @@ public Result apply(Cursor cursor) final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME); - final List> dimInfoList = Arrays.asList( + final List> selectorPlusList = Arrays.asList( DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + STRATEGY_FACTORY, Lists.newArrayList(dims), adapter, cursor @@ -206,8 +206,8 @@ public Result apply(Cursor cursor) final Map theEvent = Maps.newLinkedHashMap(); theEvent.put(EventHolder.timestampKey, new DateTime(timestampColumnSelector.get())); - for (QueryDimensionInfo dimInfo : dimInfoList) { - dimInfo.getQueryTypeHelper().addRowValuesToSelectResult(dimInfo.getOutputName(), dimInfo.getSelector(), theEvent); + for (ColumnSelectorPlus selectorPlus : selectorPlusList) { + selectorPlus.getColumnSelectorStrategy().addRowValuesToSelectResult(selectorPlus.getOutputName(), selectorPlus.getSelector(), theEvent); } for (Map.Entry metSelector : metSelectors.entrySet()) { diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index a62c834b5eac..2a669990982d 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -22,10 +22,10 @@ import io.druid.collections.StupidPool; import io.druid.java.util.common.ISE; import io.druid.java.util.common.Pair; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorUtil; import io.druid.query.aggregation.PostAggregator; -import io.druid.query.QueryDimensionInfo; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; @@ -55,11 +55,11 @@ public AggregateTopNMetricFirstAlgorithm( @Override public TopNParams makeInitParams( - QueryDimensionInfo dimInfo, Cursor cursor + ColumnSelectorPlus selectorPlus, Cursor cursor ) { return new TopNParams( - dimInfo, + selectorPlus, cursor, Integer.MAX_VALUE ); @@ -91,7 +91,7 @@ public void run( PooledTopNAlgorithm.PooledTopNParams singleMetricParam = null; int[] dimValSelector = null; try { - singleMetricParam = singleMetricAlgo.makeInitParams(params.getDimInfo(), params.getCursor()); + singleMetricParam = singleMetricAlgo.makeInitParams(params.getSelectorPlus(), params.getCursor()); singleMetricAlgo.run( singleMetricParam, singleMetricResultBuilder, @@ -109,7 +109,7 @@ public void run( PooledTopNAlgorithm.PooledTopNParams allMetricsParam = null; try { // Run topN for all metrics for top N dimension values - allMetricsParam = allMetricAlgo.makeInitParams(params.getDimInfo(), params.getCursor()); + allMetricsParam = allMetricAlgo.makeInitParams(params.getSelectorPlus(), params.getCursor()); allMetricAlgo.run( allMetricsParam, resultBuilder, diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 4c7b3f2c7625..dc9749758f14 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -20,9 +20,9 @@ package io.druid.query.topn; import com.google.common.collect.Maps; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.topn.types.TopNTypeHelper; +import io.druid.query.topn.types.TopNColumnSelectorStrategy; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; @@ -47,12 +47,12 @@ public DimExtractionTopNAlgorithm( @Override public TopNParams makeInitParams( - final QueryDimensionInfo dimInfo, + final ColumnSelectorPlus selectorPlus, final Cursor cursor ) { return new TopNParams( - dimInfo, + selectorPlus, cursor, Integer.MAX_VALUE ); @@ -61,8 +61,8 @@ public TopNParams makeInitParams( @Override protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) { - QueryDimensionInfo dimInfo = params.getDimInfo(); - return dimInfo.getQueryTypeHelper().getDimExtractionRowSelector(query, params, capabilities); + ColumnSelectorPlus selectorPlus = params.getSelectorPlus(); + return selectorPlus.getColumnSelectorStrategy().getDimExtractionRowSelector(query, params, capabilities); } @Override @@ -86,12 +86,12 @@ public void scanAndAggregate( ) { final Cursor cursor = params.getCursor(); - final QueryDimensionInfo dimInfo = params.getDimInfo(); + final ColumnSelectorPlus selectorPlus = params.getSelectorPlus(); while (!cursor.isDone()) { - dimInfo.getQueryTypeHelper().dimExtractionScanAndAggregate( + selectorPlus.getColumnSelectorStrategy().dimExtractionScanAndAggregate( query, - dimInfo.getSelector(), + selectorPlus.getSelector(), cursor, rowSelector, aggregatesStore diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index c6e7fb7e1c89..41bd8ca785c9 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -24,7 +24,7 @@ import io.druid.java.util.common.Pair; import io.druid.java.util.common.guava.CloseQuietly; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.QueryDimensionInfo; +import io.druid.query.ColumnSelectorPlus; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -58,14 +58,14 @@ public PooledTopNAlgorithm( @Override public PooledTopNParams makeInitParams( - QueryDimensionInfo dimInfo, Cursor cursor + ColumnSelectorPlus selectorPlus, Cursor cursor ) { ResourceHolder resultsBufHolder = bufferPool.take(); ByteBuffer resultsBuf = resultsBufHolder.get(); resultsBuf.clear(); - final DimensionSelector dimSelector = (DimensionSelector) dimInfo.getSelector(); + final DimensionSelector dimSelector = (DimensionSelector) selectorPlus.getSelector(); final int cardinality = dimSelector.getValueCardinality(); if (cardinality < 0) { @@ -105,7 +105,7 @@ public int[] build() final int numValuesPerPass = numBytesPerRecord > 0 ? numBytesToWorkWith / numBytesPerRecord : cardinality; return PooledTopNParams.builder() - .withDimInfo(dimInfo) + .withSelectorPlus(selectorPlus) .withCursor(cursor) .withResultsBufHolder(resultsBufHolder) .withResultsBuf(resultsBuf) @@ -509,7 +509,7 @@ public static class PooledTopNParams extends TopNParams private final TopNMetricSpecBuilder arrayProvider; public PooledTopNParams( - QueryDimensionInfo dimInfo, + ColumnSelectorPlus selectorPlus, Cursor cursor, ResourceHolder resultsBufHolder, ByteBuffer resultsBuf, @@ -519,7 +519,7 @@ public PooledTopNParams( TopNMetricSpecBuilder arrayProvider ) { - super(dimInfo, cursor, numValuesPerPass); + super(selectorPlus, cursor, numValuesPerPass); this.resultsBufHolder = resultsBufHolder; this.resultsBuf = resultsBuf; @@ -560,7 +560,7 @@ public TopNMetricSpecBuilder getArrayProvider() public static class Builder { - private QueryDimensionInfo dimInfo; + private ColumnSelectorPlus selectorPlus; private Cursor cursor; private ResourceHolder resultsBufHolder; private ByteBuffer resultsBuf; @@ -571,7 +571,7 @@ public static class Builder public Builder() { - dimInfo = null; + selectorPlus = null; cursor = null; resultsBufHolder = null; resultsBuf = null; @@ -581,9 +581,9 @@ public Builder() arrayProvider = null; } - public Builder withDimInfo(QueryDimensionInfo dimInfo) + public Builder withSelectorPlus(ColumnSelectorPlus selectorPlus) { - this.dimInfo = dimInfo; + this.selectorPlus = selectorPlus; return this; } @@ -632,7 +632,7 @@ public Builder withArrayProvider(TopNMetricSpecBuilder arrayProvider) public PooledTopNParams build() { return new PooledTopNParams( - dimInfo, + selectorPlus, cursor, resultsBufHolder, resultsBuf, diff --git a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java index b88b6e6f2ee4..cb53b0f3561b 100644 --- a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -21,7 +21,7 @@ import com.google.common.collect.Maps; import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; +import io.druid.query.ColumnSelectorPlus; import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -41,10 +41,10 @@ public TimeExtractionTopNAlgorithm(Capabilities capabilities, TopNQuery query) @Override - public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor) + public TopNParams makeInitParams(ColumnSelectorPlus selectorPlus, Cursor cursor) { return new TopNParams( - dimInfo, + selectorPlus, cursor, Integer.MAX_VALUE ); diff --git a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java index cbc9f2519f5b..03473091e1ae 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java @@ -19,9 +19,9 @@ package io.druid.query.topn; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.topn.types.TopNTypeHelper; +import io.druid.query.topn.types.TopNColumnSelectorStrategy; import io.druid.segment.Cursor; /** @@ -32,7 +32,7 @@ public interface TopNAlgorithm public static final int INIT_POSITION_VALUE = -1; public static final int SKIP_POSITION_VALUE = -2; - public TopNParams makeInitParams(QueryDimensionInfo dimInfo, Cursor cursor); + public TopNParams makeInitParams(ColumnSelectorPlus selectorPlus, Cursor cursor); public void run( Parameters params, diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index c2191dc31fe4..e1c0dd6c6cef 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -22,14 +22,14 @@ import com.google.common.base.Function; import com.google.common.collect.Lists; import io.druid.query.Result; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.topn.types.TopNTypeHelperFactory; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.topn.types.TopNStrategyFactory; import io.druid.segment.Cursor; import io.druid.segment.DimensionHandlerUtils; public class TopNMapFn implements Function> { - private static final TopNTypeHelperFactory TYPE_HELPER_FACTORY = new TopNTypeHelperFactory(); + private static final TopNStrategyFactory STRATEGY_FACTORY = new TopNStrategyFactory(); private final TopNQuery query; private final TopNAlgorithm topNAlgorithm; @@ -47,20 +47,20 @@ public TopNMapFn( @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final QueryDimensionInfo[] dimInfoArray = DimensionHandlerUtils.getDimensionInfo( - TYPE_HELPER_FACTORY, + final ColumnSelectorPlus[] selectorPlusArray = DimensionHandlerUtils.getDimensionInfo( + STRATEGY_FACTORY, Lists.newArrayList(query.getDimensionSpec()), null, cursor ); - if (dimInfoArray[0].getSelector() == null) { + if (selectorPlusArray[0].getSelector() == null) { return null; } TopNParams params = null; try { - params = topNAlgorithm.makeInitParams(dimInfoArray[0], cursor); + params = topNAlgorithm.makeInitParams(selectorPlusArray[0], cursor); TopNResultBuilder resultBuilder = BaseTopNAlgorithm.makeResultBuilder(params, query); diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index 16086db688b7..2083ff97c485 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -19,8 +19,8 @@ package io.druid.query.topn; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.topn.types.TopNTypeHelper; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.topn.types.TopNColumnSelectorStrategy; import io.druid.segment.Cursor; /** @@ -30,17 +30,17 @@ public class TopNParams private final Cursor cursor; private final int cardinality; private final int numValuesPerPass; - private final QueryDimensionInfo dimInfo; + private final ColumnSelectorPlus selectorPlus; protected TopNParams( - QueryDimensionInfo dimInfo, + ColumnSelectorPlus selectorPlus, Cursor cursor, int numValuesPerPass ) { - this.dimInfo = dimInfo; + this.selectorPlus = selectorPlus; this.cursor = cursor; - this.cardinality = dimInfo.getCardinality(); + this.cardinality = selectorPlus.getColumnSelectorStrategy().getCardinality(selectorPlus.getSelector()); this.numValuesPerPass = numValuesPerPass; if (cardinality < 0) { @@ -50,12 +50,12 @@ protected TopNParams( public Object getDimSelector() { - return dimInfo.getSelector(); + return selectorPlus.getSelector(); } - public QueryDimensionInfo getDimInfo() + public ColumnSelectorPlus getSelectorPlus() { - return dimInfo; + return selectorPlus; } public Cursor getCursor() diff --git a/processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java similarity index 90% rename from processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java rename to processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java index c0ce6d02b03f..10ece39c411e 100644 --- a/processing/src/main/java/io/druid/query/topn/types/StringTopNTypeHelper.java +++ b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java @@ -11,8 +11,14 @@ import java.util.Map; -public class StringTopNTypeHelper implements TopNTypeHelper +public class StringTopNColumnSelectorStrategy implements TopNColumnSelectorStrategy { + @Override + public int getCardinality(DimensionSelector selector) + { + return selector.getValueCardinality(); + } + @Override public Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, Capabilities capabilities) { diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java similarity index 93% rename from processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java rename to processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java index fae4aa2186c8..01e293bc4ee9 100644 --- a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelper.java +++ b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java @@ -20,7 +20,7 @@ package io.druid.query.topn.types; import io.druid.query.aggregation.Aggregator; -import io.druid.query.dimension.QueryTypeHelper; +import io.druid.query.dimension.ColumnSelectorStrategy; import io.druid.query.topn.TopNParams; import io.druid.query.topn.TopNQuery; import io.druid.segment.Capabilities; @@ -29,8 +29,10 @@ import java.util.Map; -public interface TopNTypeHelper extends QueryTypeHelper +public interface TopNColumnSelectorStrategy extends ColumnSelectorStrategy { + int getCardinality(ValueSelectorType selector); + /** * Used by DimExtractionTopNAlgorithm. * diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java b/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java similarity index 77% rename from processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java rename to processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java index dfb8cdb5f102..b5b4a4040bf0 100644 --- a/processing/src/main/java/io/druid/query/topn/types/TopNTypeHelperFactory.java +++ b/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java @@ -20,21 +20,21 @@ package io.druid.query.topn.types; import io.druid.java.util.common.IAE; -import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; -public class TopNTypeHelperFactory implements QueryTypeHelperFactory +public class TopNStrategyFactory implements ColumnSelectorStrategyFactory { @Override - public TopNTypeHelper makeQueryTypeHelper( - String dimName, ColumnCapabilities capabilities + public TopNColumnSelectorStrategy makeColumnSelectorStrategy( + String columnName, ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringTopNTypeHelper(); + return new StringTopNColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } diff --git a/processing/src/main/java/io/druid/segment/DimensionHandler.java b/processing/src/main/java/io/druid/segment/DimensionHandler.java index 403d724d7d49..8f2e791cf8ee 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandler.java @@ -119,10 +119,6 @@ DimensionMergerLegacy makeLegacyMerger( ProgressIndicator progress ) throws IOException; - - public DimensionQueryHelper makeQueryHelper(); - - /** * Given an array representing a single set of row value(s) for this dimension as an Object, * return the length of the array after appropriate type-casting. diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index dc1151f163fd..b27df904155e 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -22,10 +22,10 @@ import com.google.common.collect.Lists; import io.druid.java.util.common.IAE; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; -import io.druid.query.QueryDimensionInfo; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.dimension.DimensionSpec; -import io.druid.query.dimension.QueryTypeHelper; -import io.druid.query.dimension.QueryTypeHelperFactory; +import io.druid.query.dimension.ColumnSelectorStrategy; +import io.druid.query.dimension.ColumnSelectorStrategyFactory; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilitiesImpl; @@ -37,7 +37,7 @@ public final class DimensionHandlerUtils { private DimensionHandlerUtils() {} - private final static ColumnCapabilities DEFAULT_STRING_CAPABILITIES = + public final static ColumnCapabilities DEFAULT_STRING_CAPABILITIES = new ColumnCapabilitiesImpl().setType(ValueType.STRING) .setDictionaryEncoded(true) .setHasBitmapIndexes(true); @@ -69,67 +69,55 @@ public static DimensionHandler getHandlerFromCapabilities( return new StringDimensionHandler(dimensionName, multiValueHandling); } - public static DimensionQueryHelper makeBaseQueryHelper( + public static ColumnSelectorStrategyClass makeStrategy( + ColumnSelectorStrategyFactory strategyFactory, String dimName, ColumnCapabilities capabilities, List availableDimensions ) { - capabilities = setDefaultForInvalidCapabilities(dimName, capabilities, availableDimensions); - if (capabilities.getType() == ValueType.STRING) { - return new StringDimensionQueryHelper(dimName); - } - return null; + capabilities = getEffectiveCapabilities(dimName, capabilities, availableDimensions); + return strategyFactory.makeColumnSelectorStrategy(dimName, capabilities); } - public static QueryTypeHelperClass makeQueryTypeHelper( - QueryTypeHelperFactory typeHelperFactory, - String dimName, - ColumnCapabilities capabilities, - List availableDimensions - ) - { - capabilities = setDefaultForInvalidCapabilities(dimName, capabilities, availableDimensions); - return typeHelperFactory.makeQueryTypeHelper(dimName, capabilities); - } - - public static QueryDimensionInfo[] getDimensionInfo( - QueryTypeHelperFactory typeHelperFactory, + public static ColumnSelectorPlus[] getDimensionInfo( + ColumnSelectorStrategyFactory strategyFactory, List dimensionSpecs, StorageAdapter adapter, ColumnSelectorFactory cursor ) { int dimCount = dimensionSpecs.size(); - QueryDimensionInfo[] dims = new QueryDimensionInfo[dimCount]; + ColumnSelectorPlus[] dims = new ColumnSelectorPlus[dimCount]; for (int i = 0; i < dimCount; i++) { final DimensionSpec dimSpec = dimensionSpecs.get(i); final String dimName = dimSpec.getDimension(); - DimensionQueryHelper baseHelper = makeBaseQueryHelper( + ColumnSelectorStrategyClass strategy = makeStrategy( + strategyFactory, dimName, cursor.getColumnCapabilities(dimSpec.getDimension()), adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) ); - QueryTypeHelperClass queryTypeHelper = makeQueryTypeHelper( - typeHelperFactory, - dimName, - cursor.getColumnCapabilities(dimSpec.getDimension()), + final ColumnValueSelector selector = getColumnValueSelectorFromDimensionSpec( + dimSpec, + cursor, adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) ); - - final ColumnValueSelector selector = baseHelper.getColumnValueSelector(dimSpec, cursor); - final QueryDimensionInfo dimInfo = new QueryDimensionInfo<>( - dimSpec, - baseHelper, - queryTypeHelper, + final ColumnSelectorPlus selectorPlus = new ColumnSelectorPlus<>( + dimName, + dimSpec.getOutputName(), + strategy, selector ); - dims[i] = dimInfo; + dims[i] = selectorPlus; } return dims; } - private static ColumnCapabilities setDefaultForInvalidCapabilities( + // When determining the capabilites of a column during query processing, this function + // adjusts the capabilities for columns that cannot be handled as-is to manageable defaults + // (e.g., treating missing columns as empty String columns) + public static ColumnCapabilities getEffectiveCapabilities( String dimName, ColumnCapabilities capabilities, List availableDimensions @@ -153,4 +141,21 @@ private static ColumnCapabilities setDefaultForInvalidCapabilities( return capabilities; } + + public static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( + DimensionSpec dimSpec, + ColumnSelectorFactory columnSelectorFactory, + List availableDimensions + ) + { + String dimName = dimSpec.getOutputName(); + ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); + capabilities = getEffectiveCapabilities(dimName, capabilities, availableDimensions); + switch (capabilities.getType()) { + case STRING: + return columnSelectorFactory.makeDimensionSelector(dimSpec); + default: + return null; + } + } } diff --git a/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java b/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java deleted file mode 100644 index c4a2cccbf3f8..000000000000 --- a/processing/src/main/java/io/druid/segment/DimensionQueryHelper.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.segment; - -import io.druid.query.dimension.DimensionSpec; -import io.druid.query.filter.DruidPredicateFactory; -import io.druid.query.filter.ValueMatcher; - -/** - * Query related interface. - * - * Contains a collection of query processing methods for functionality that is dependent on - * the type of a dimension. - * - * The methods within this interface are general methods that are not tied to a specific query type. - * - * Each DimensionQueryHelper is associated with a single dimension. - * - * @param The type of the row values object for this dimension - * @param The type of the row value selector (e.g. DimensionSelector) for this dimension - */ -public interface DimensionQueryHelper -{ - /** - * Get a typed column value selector (DimensionSelector, LongColumnSelector, etc.) from a ColumnSelectorFactory. - * @param dimensionSpec The dimension of the selector - * @param columnSelectorFactory Column value selector provider - * @return Column value selector for the dimension specified by dimensionSpec. - */ - ValueSelectorType getColumnValueSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory columnSelectorFactory); - - /** - * Retrieve the current row from a dimension value selector. - * - * @param dimSelector Dimension value selector - * @return Current row - */ - RowValuesType getRowFromDimSelector(ValueSelectorType dimSelector); - - /** - * Get the size of a row object. - * - * The type of the row object will depend on the dimension type, e.g.: - * - * String type -> IndexedInts row object - * Long type -> IndexedLongs row object - * - * @param rowValues The row object to return the size of - * @return size of the row object - */ - int getRowSize(RowValuesType rowValues); - - - /** - * Get the cardinality, if possible, from a dimension value selector object. - * - * The class of the row object will depend on the dimension type, e.g: - * - * String type -> DimensionSelector - * Long type -> LongColumnSelector - * - * @param valueSelector The dimension value selector object - * @return Cardinality of the dimension value selector object, -1 if cardinality is not available. - */ - int getCardinality(ValueSelectorType valueSelector); - - - // Functions for QueryableIndexStorageAdapter, FilteredAggregatorFactory - /** - * Create a single value ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. - * - * @param cursor ColumnSelectorFactory for creating dimension value selectors - * @param value Value to match against - * @return ValueMatcher that matches on 'value' - */ - ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, String value); - - - /** - * Create a predicate-based ValueMatcher, used for filtering by QueryableIndexStorageAdapter and FilteredAggregatorFactory. - * - * @param cursor ColumnSelectorFactory for creating dimension value selectors - * @param predicateFactory A DruidPredicateFactory that provides the filter predicates to be matched - * @return A ValueMatcher that applies the predicate for this DimensionQueryHelper's value type from the predicateFactory - */ - ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); -} diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 07945909635b..2096dfc6676d 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -43,6 +43,8 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.query.filter.ValueMatcherColumnSelectorStrategy; +import io.druid.query.filter.ValueMatcherColumnSelectorStrategyFactory; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.Column; @@ -1038,6 +1040,9 @@ public void set(Offset currOffset) private static class CursorOffsetHolderValueMatcherFactory implements ValueMatcherFactory { + private static final ValueMatcherColumnSelectorStrategyFactory STRATEGY_FACTORY = + new ValueMatcherColumnSelectorStrategyFactory(); + private final StorageAdapter storageAdapter; private final ColumnSelectorFactory cursor; private final List availableMetrics; @@ -1064,12 +1069,12 @@ public ValueMatcher makeValueMatcher(String dimension, final String value) } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( dimension, - cursor.getColumnCapabilities(dimension), - Lists.newArrayList(storageAdapter.getAvailableDimensions()) + cursor.getColumnCapabilities(dimension) ); - return queryHelper.getValueMatcher(cursor, value); + + return strategy.getValueMatcher(cursor, value); } @Override @@ -1081,12 +1086,12 @@ public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFacto } } - final DimensionQueryHelper queryHelper = DimensionHandlerUtils.makeBaseQueryHelper( + final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( dimension, - cursor.getColumnCapabilities(dimension), - Lists.newArrayList(storageAdapter.getAvailableDimensions()) + cursor.getColumnCapabilities(dimension) ); - return queryHelper.getValueMatcher(cursor, predicateFactory); + + return strategy.getValueMatcher(cursor, predicateFactory); } private ValueMatcher makeLongValueMatcher(String dimension, final DruidLongPredicate predicate) diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index db7569a02287..b45f95edec27 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -213,10 +213,4 @@ public DimensionMergerLegacy makeLegacyMerger( { return new StringDimensionMergerLegacy(dimensionName, indexSpec, outDir, ioPeon, capabilities, progress); } - - @Override - public DimensionQueryHelper makeQueryHelper() - { - return new StringDimensionQueryHelper(dimensionName); - } } diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java index 8128cbc032a9..cba915f2d223 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java @@ -28,12 +28,12 @@ import com.google.common.collect.FluentIterable; import com.google.common.collect.Lists; import com.google.common.collect.Range; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.StringCardinalityAggColumnSelectorStrategy; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.segment.DimensionSelector; -import io.druid.segment.StringDimensionQueryHelper; import java.nio.ByteBuffer; import java.util.List; @@ -44,7 +44,7 @@ public class CardinalityAggregatorBenchmark extends SimpleBenchmark CardinalityBufferAggregator agg; List selectorList; - List> dimInfoList; + List> dimInfoList; ByteBuffer buf; int pos; @@ -82,7 +82,12 @@ public String[] apply(Integer input) final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null); - final QueryDimensionInfo dimInfo1 = new QueryDimensionInfo(dimSpec1, new StringDimensionQueryHelper("dim1"), null, dim1); + final ColumnSelectorPlus dimInfo1 = new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), + dim1 + ); selectorList = Lists.newArrayList( (DimensionSelector) dim1 diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index 1b87fcc65e40..153faf26188b 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -28,12 +28,12 @@ import com.google.common.collect.Maps; import io.druid.jackson.DefaultObjectMapper; import io.druid.js.JavaScriptConfig; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.QueryDimensionInfo; -import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorTypeHelper; -import io.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorTypeHelper; +import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.StringCardinalityAggColumnSelectorStrategy; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; @@ -42,7 +42,6 @@ import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.segment.DimensionSelector; -import io.druid.segment.StringDimensionQueryHelper; import io.druid.segment.data.IndexedInts; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntIterators; @@ -248,19 +247,19 @@ private static void bufferAggregate( } } - List> dimInfoList; + List> dimInfoList; List selectorList; CardinalityAggregatorFactory rowAggregatorFactory; CardinalityAggregatorFactory valueAggregatorFactory; final TestDimensionSelector dim1; final TestDimensionSelector dim2; - List> dimInfoListWithExtraction; + List> dimInfoListWithExtraction; List selectorListWithExtraction; final TestDimensionSelector dim1WithExtraction; final TestDimensionSelector dim2WithExtraction; - List> dimInfoListConstantVal; + List> dimInfoListConstantVal; List selectorListConstantVal; final TestDimensionSelector dim1ConstantVal; final TestDimensionSelector dim2ConstantVal; @@ -274,17 +273,15 @@ public CardinalityAggregatorTest() dim2 = new TestDimensionSelector(values2, null); dimInfoList = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec1, - new StringDimensionQueryHelper("dim1"), - new StringCardinalityAggregatorTypeHelper(), - dim1 + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim1 ), - new QueryDimensionInfo( - dimSpec2, - new StringDimensionQueryHelper("dim2"), - new StringCardinalityAggregatorTypeHelper(), - dim2 + new ColumnSelectorPlus( + dimSpec2.getDimension(), + dimSpec2.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim2 ) ); @@ -321,17 +318,15 @@ public CardinalityAggregatorTest() dim2WithExtraction ); dimInfoListWithExtraction = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec1, - new StringDimensionQueryHelper("dim1"), - new StringCardinalityAggregatorTypeHelper(), - dim1WithExtraction + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim1WithExtraction ), - new QueryDimensionInfo( - dimSpec2, - new StringDimensionQueryHelper("dim2"), - new StringCardinalityAggregatorTypeHelper(), - dim2WithExtraction + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim2WithExtraction ) ); @@ -344,18 +339,15 @@ public CardinalityAggregatorTest() dim2ConstantVal ); dimInfoListConstantVal = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec1, - new StringDimensionQueryHelper("dim1"), - new StringCardinalityAggregatorTypeHelper(), - dim1ConstantVal + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim1ConstantVal ), - new QueryDimensionInfo( - dimSpec2, - new StringDimensionQueryHelper( - "dim2"), - new StringCardinalityAggregatorTypeHelper(), - dim2ConstantVal + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim2ConstantVal ) ); @@ -439,20 +431,18 @@ public void testCombineRows() { List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec1, - new StringDimensionQueryHelper("dim1"), - new StringCardinalityAggregatorTypeHelper(), - dim1 + List> dimInfo1 = Lists.newArrayList( + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec2, - new StringDimensionQueryHelper("dim2"), - new StringCardinalityAggregatorTypeHelper(), - dim2 + List> dimInfo2 = Lists.newArrayList( + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim2 ) ); @@ -487,20 +477,18 @@ public void testCombineValues() List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec1, - new StringDimensionQueryHelper("dim1"), - new StringCardinalityAggregatorTypeHelper(), - dim1 + List> dimInfo1 = Lists.newArrayList( + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new QueryDimensionInfo( - dimSpec2, - new StringDimensionQueryHelper("dim2"), - new StringCardinalityAggregatorTypeHelper(), - dim2 + List> dimInfo2 = Lists.newArrayList( + new ColumnSelectorPlus( + dimSpec1.getDimension(), + dimSpec1.getOutputName(), + new StringCardinalityAggColumnSelectorStrategy(), dim2 ) ); From 8c1751ddab6ae9e728fbdee797609e1672cd4631 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Mon, 19 Dec 2016 18:50:35 -0800 Subject: [PATCH 11/12] More PR comments --- .../FilteredAggregatorFactory.java | 34 ++++++--- .../cardinality/CardinalityAggregator.java | 14 ++-- .../CardinalityAggregatorFactory.java | 18 ++--- .../CardinalityBufferAggregator.java | 6 +- ...lityAggregatorColumnSelectorStrategy.java} | 2 +- ...regatorColumnSelectorStrategyFactory.java} | 9 ++- ...lityAggregatorColumnSelectorStrategy.java} | 2 +- .../ColumnSelectorStrategyFactory.java | 4 +- ...ingValueMatcherColumnSelectorStrategy.java | 11 +-- .../ValueMatcherColumnSelectorStrategy.java | 4 +- ...eMatcherColumnSelectorStrategyFactory.java | 6 +- .../epinephelinae/GroupByQueryEngineV2.java | 15 ++-- .../druid/query/search/SearchQueryRunner.java | 5 +- .../druid/query/select/SelectQueryEngine.java | 5 +- .../java/io/druid/query/topn/TopNMapFn.java | 3 +- .../query/topn/types/TopNStrategyFactory.java | 2 +- .../druid/segment/DimensionHandlerUtils.java | 74 +++++++++---------- .../segment/QueryableIndexStorageAdapter.java | 28 ++++--- .../CardinalityAggregatorBenchmark.java | 10 +-- .../CardinalityAggregatorTest.java | 58 +++++++-------- .../query/groupby/GroupByQueryRunnerTest.java | 39 ++++++++++ 21 files changed, 196 insertions(+), 153 deletions(-) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{CardinalityAggColumnSelectorStrategy.java => CardinalityAggregatorColumnSelectorStrategy.java} (93%) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{CardinalityAggColumnSelectorStrategyFactory.java => CardinalityAggregatorColumnSelectorStrategyFactory.java} (78%) rename processing/src/main/java/io/druid/query/aggregation/cardinality/types/{StringCardinalityAggColumnSelectorStrategy.java => StringCardinalityAggregatorColumnSelectorStrategy.java} (95%) diff --git a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java index b0f35937b6e9..2ea3b27c0c15 100644 --- a/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/FilteredAggregatorFactory.java @@ -21,6 +21,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import io.druid.query.ColumnSelectorPlus; +import io.druid.query.dimension.DefaultDimensionSpec; +import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.DimFilter; import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.DruidPredicateFactory; @@ -29,6 +33,7 @@ import io.druid.query.filter.ValueMatcherColumnSelectorStrategyFactory; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -226,12 +231,16 @@ public ValueMatcher makeValueMatcher(final String dimension, final String value) ); } - final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( - dimension, - columnSelectorFactory.getColumnCapabilities(dimension) - ); + ColumnSelectorPlus[] selector = + DimensionHandlerUtils.createColumnSelectorPluses( + STRATEGY_FACTORY, + ImmutableList.of(DefaultDimensionSpec.of(dimension)), + columnSelectorFactory + ); + - return strategy.getValueMatcher(columnSelectorFactory, value); + final ValueMatcherColumnSelectorStrategy strategy = selector[0].getColumnSelectorStrategy(); + return strategy.getValueMatcher(dimension, columnSelectorFactory, value); } public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicateFactory predicateFactory) @@ -241,11 +250,16 @@ public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicat case LONG: return makeLongValueMatcher(dimension, predicateFactory.makeLongPredicate()); case STRING: - final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( - dimension, - columnSelectorFactory.getColumnCapabilities(dimension) - ); - return strategy.getValueMatcher(columnSelectorFactory, predicateFactory); + ColumnSelectorPlus[] selector = + DimensionHandlerUtils.createColumnSelectorPluses( + STRATEGY_FACTORY, + ImmutableList.of(DefaultDimensionSpec.of(dimension)), + columnSelectorFactory + ); + + + final ValueMatcherColumnSelectorStrategy strategy = selector[0].getColumnSelectorStrategy(); + return strategy.getValueMatcher(dimension, columnSelectorFactory, predicateFactory); default: return new BooleanValueMatcher(predicateFactory.makeStringPredicate().apply(null)); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java index b27256df04c6..20bc344beeb6 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregator.java @@ -24,7 +24,7 @@ import com.google.common.hash.Hashing; import io.druid.query.aggregation.Aggregator; import io.druid.query.ColumnSelectorPlus; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.util.List; @@ -32,13 +32,13 @@ public class CardinalityAggregator implements Aggregator { private final String name; - private final List> selectorPlusList; + private final List> selectorPlusList; private final boolean byRow; public static final HashFunction hashFn = Hashing.murmur3_128(); protected static void hashRow( - List> selectorPlusList, + List> selectorPlusList, HyperLogLogCollector collector ) { @@ -48,18 +48,18 @@ protected static void hashRow( hasher.putByte((byte) 0); } - ColumnSelectorPlus selectorPlus = selectorPlusList.get(k); + ColumnSelectorPlus selectorPlus = selectorPlusList.get(k); selectorPlus.getColumnSelectorStrategy().hashRow(selectorPlus.getSelector(), hasher); } collector.add(hasher.hash().asBytes()); } protected static void hashValues( - List> selectorPlusList, + List> selectorPlusList, HyperLogLogCollector collector ) { - for (final ColumnSelectorPlus selectorPlus : selectorPlusList) { + for (final ColumnSelectorPlus selectorPlus : selectorPlusList) { selectorPlus.getColumnSelectorStrategy().hashValues(selectorPlus.getSelector(), collector); } } @@ -68,7 +68,7 @@ protected static void hashValues( public CardinalityAggregator( String name, - List> selectorPlusList, + List> selectorPlusList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java index 04743e624a08..d8ccf5bac297 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorFactory.java @@ -32,8 +32,8 @@ import io.druid.query.aggregation.AggregatorFactoryNotMergeableException; import io.druid.query.aggregation.Aggregators; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategyFactory; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategyFactory; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; @@ -96,8 +96,8 @@ public static Object estimateCardinality(Object object) private static final byte CACHE_TYPE_ID = (byte) 0x8; private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF; - private static final CardinalityAggColumnSelectorStrategyFactory STRATEGY_FACTORY = - new CardinalityAggColumnSelectorStrategyFactory(); + private static final CardinalityAggregatorColumnSelectorStrategyFactory STRATEGY_FACTORY = + new CardinalityAggregatorColumnSelectorStrategyFactory(); private final String name; private final List fields; @@ -137,11 +137,10 @@ public CardinalityAggregatorFactory( @Override public Aggregator factorize(final ColumnSelectorFactory columnFactory) { - List> selectorPlusList = - Arrays.asList(DimensionHandlerUtils.getDimensionInfo( + List> selectorPlusList = + Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, fields, - null, columnFactory )); @@ -156,11 +155,10 @@ public Aggregator factorize(final ColumnSelectorFactory columnFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnFactory) { - List> selectorPlusList = - Arrays.asList(DimensionHandlerUtils.getDimensionInfo( + List> selectorPlusList = + Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, fields, - null, columnFactory )); diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java index 73a1169ffc6e..3ea20e2388df 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/CardinalityBufferAggregator.java @@ -21,7 +21,7 @@ import io.druid.query.aggregation.BufferAggregator; import io.druid.query.ColumnSelectorPlus; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector; import java.nio.ByteBuffer; @@ -29,13 +29,13 @@ public class CardinalityBufferAggregator implements BufferAggregator { - private final List> selectorPlusList; + private final List> selectorPlusList; private final boolean byRow; private static final byte[] EMPTY_BYTES = HyperLogLogCollector.makeEmptyVersionedByteArray(); public CardinalityBufferAggregator( - List> selectorPlusList, + List> selectorPlusList, boolean byRow ) { diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java similarity index 93% rename from processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java rename to processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java index 1bdfad6f0d4a..c85ce0d739ce 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategy.java @@ -24,7 +24,7 @@ import io.druid.query.dimension.ColumnSelectorStrategy; import io.druid.segment.ColumnValueSelector; -public interface CardinalityAggColumnSelectorStrategy extends +public interface CardinalityAggregatorColumnSelectorStrategy extends ColumnSelectorStrategy { /*** diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java similarity index 78% rename from processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategyFactory.java rename to processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java index 8ac44e8b4549..9a74997855c8 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/CardinalityAggregatorColumnSelectorStrategyFactory.java @@ -24,17 +24,18 @@ import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; -public class CardinalityAggColumnSelectorStrategyFactory implements ColumnSelectorStrategyFactory +public class CardinalityAggregatorColumnSelectorStrategyFactory + implements ColumnSelectorStrategyFactory { @Override - public CardinalityAggColumnSelectorStrategy makeColumnSelectorStrategy( - String columnName, ColumnCapabilities capabilities + public CardinalityAggregatorColumnSelectorStrategy makeColumnSelectorStrategy( + ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); switch(type) { case STRING: - return new StringCardinalityAggColumnSelectorStrategy(); + return new StringCardinalityAggregatorColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } diff --git a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java similarity index 95% rename from processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java rename to processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java index 6cdc91add33e..8127ecb4b73d 100644 --- a/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/aggregation/cardinality/types/StringCardinalityAggregatorColumnSelectorStrategy.java @@ -28,7 +28,7 @@ import java.util.Arrays; -public class StringCardinalityAggColumnSelectorStrategy implements CardinalityAggColumnSelectorStrategy +public class StringCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy { public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; diff --git a/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java index fd41eb2ae3e0..7729e05e48db 100644 --- a/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/dimension/ColumnSelectorStrategyFactory.java @@ -21,7 +21,7 @@ import io.druid.segment.column.ColumnCapabilities; -public interface ColumnSelectorStrategyFactory +public interface ColumnSelectorStrategyFactory { - ColumnSelectorClass makeColumnSelectorStrategy(String columnName, ColumnCapabilities capabilities); + ColumnSelectorStrategyClass makeColumnSelectorStrategy(ColumnCapabilities capabilities); } diff --git a/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java index 38c3f869c956..8a1207c35426 100644 --- a/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/filter/StringValueMatcherColumnSelectorStrategy.java @@ -31,15 +31,8 @@ public class StringValueMatcherColumnSelectorStrategy implements ValueMatcherColumnSelectorStrategy { - private final String columnName; - - public StringValueMatcherColumnSelectorStrategy(String columnName) - { - this.columnName = columnName; - } - @Override - public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final String value) + public ValueMatcher getValueMatcher(String columnName, ColumnSelectorFactory cursor, final String value) { final String valueStr = Strings.emptyToNull(value); final DimensionSelector selector = cursor.makeDimensionSelector( @@ -101,7 +94,7 @@ public boolean matches() } @Override - public ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory) + public ValueMatcher getValueMatcher(String columnName, ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory) { final DimensionSelector selector = cursor.makeDimensionSelector( new DefaultDimensionSpec(columnName, columnName) diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java index f1059341b0e2..6e6c747bd4de 100644 --- a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategy.java @@ -31,7 +31,7 @@ public interface ValueMatcherColumnSelectorStrategy extends ColumnSelectorStrate * @param value Value to match against * @return ValueMatcher that matches on 'value' */ - ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, String value); + ValueMatcher getValueMatcher(String columnName, ColumnSelectorFactory cursor, String value); /** @@ -41,5 +41,5 @@ public interface ValueMatcherColumnSelectorStrategy extends ColumnSelectorStrate * @param predicateFactory A DruidPredicateFactory that provides the filter predicates to be matched * @return A ValueMatcher that applies the predicate for this DimensionQueryHelper's value type from the predicateFactory */ - ValueMatcher getValueMatcher(ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); + ValueMatcher getValueMatcher(String columnName, ColumnSelectorFactory cursor, final DruidPredicateFactory predicateFactory); } diff --git a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java index cbcb326751b3..828189471d1c 100644 --- a/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/filter/ValueMatcherColumnSelectorStrategyFactory.java @@ -21,7 +21,6 @@ import io.druid.java.util.common.IAE; import io.druid.query.dimension.ColumnSelectorStrategyFactory; -import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -30,14 +29,13 @@ public class ValueMatcherColumnSelectorStrategyFactory { @Override public ValueMatcherColumnSelectorStrategy makeColumnSelectorStrategy( - String columnName, ColumnCapabilities capabilities + ColumnCapabilities capabilities ) { - capabilities = DimensionHandlerUtils.getEffectiveCapabilities(columnName, capabilities, null); ValueType type = capabilities.getType(); switch (type) { case STRING: - return new StringValueMatcherColumnSelectorStrategy(columnName); + return new StringValueMatcherColumnSelectorStrategy(); default: throw new IAE("Cannot create query type helper from invalid type [%s]", type); } diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 156d6fd3e555..5c17f5bbc81d 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -66,7 +66,7 @@ public class GroupByQueryEngineV2 { private static final GroupByStrategyFactory STRATEGY_FACTORY = new GroupByStrategyFactory(); - private static GroupByColumnSelectorPlus[] getGroupBySelectorPlus(ColumnSelectorPlus[] baseSelectorPlus) + private static GroupByColumnSelectorPlus[] createGroupBySelectorPlus(ColumnSelectorPlus[] baseSelectorPlus) { GroupByColumnSelectorPlus[] retInfo = new GroupByColumnSelectorPlus[baseSelectorPlus.length]; int curPos = 0; @@ -134,10 +134,9 @@ public Sequence apply(final Cursor cursor) @Override public GroupByEngineIterator make() { - ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils.getDimensionInfo( + ColumnSelectorPlus[] selectorPlus = DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, query.getDimensions(), - storageAdapter, cursor ); return new GroupByEngineIterator( @@ -146,7 +145,7 @@ public GroupByEngineIterator make() cursor, bufferHolder.get(), fudgeTimestamp, - getGroupBySelectorPlus(selectorPlus) + createGroupBySelectorPlus(selectorPlus) ); } @@ -176,7 +175,7 @@ private static class GroupByStrategyFactory implements ColumnSelectorStrategyFac { @Override public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( - String columnName, ColumnCapabilities capabilities + ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); @@ -194,10 +193,8 @@ public GroupByColumnSelectorStrategy makeColumnSelectorStrategy( * GroupByQueryEngineV2. * * Each GroupByColumnSelectorStrategy is associated with a single dimension. - * - * @param The type of the row values object for this dimension */ - private interface GroupByColumnSelectorStrategy extends ColumnSelectorStrategy + private interface GroupByColumnSelectorStrategy extends ColumnSelectorStrategy { /** * Return the size, in bytes, of this dimension's values in the grouping key. @@ -269,7 +266,7 @@ void processValueFromGroupingKey( boolean checkRowIndexAndAddValueToGroupingKey(int keyBufferPosition, Object rowObj, int rowValIdx, ByteBuffer keyBuffer); } - private static class StringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy + private static class StringGroupByColumnSelectorStrategy implements GroupByColumnSelectorStrategy { private static final int GROUP_BY_MISSING_VALUE = -1; diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index a811ab9733f1..9aeac5f52644 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -85,7 +85,7 @@ private static class SearchStrategyFactory implements ColumnSelectorStrategyFact { @Override public SearchColumnSelectorStrategy makeColumnSelectorStrategy( - String columnName, ColumnCapabilities capabilities + ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); @@ -341,10 +341,9 @@ public Object2IntRBTreeMap accumulate(Object2IntRBTreeMap } List> selectorPlusList = Arrays.asList( - DimensionHandlerUtils.getDimensionInfo( + DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, nonBitmapDims, - adapter, cursor ) ); diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java index 8f80844adbf9..411e5db119c6 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryEngine.java @@ -68,7 +68,7 @@ private static class SelectStrategyFactory implements ColumnSelectorStrategyFact { @Override public SelectColumnSelectorStrategy makeColumnSelectorStrategy( - String columnName, ColumnCapabilities capabilities + ColumnCapabilities capabilities ) { ValueType type = capabilities.getType(); @@ -178,10 +178,9 @@ public Result apply(Cursor cursor) final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME); final List> selectorPlusList = Arrays.asList( - DimensionHandlerUtils.getDimensionInfo( + DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, Lists.newArrayList(dims), - adapter, cursor ) ); diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java index e1c0dd6c6cef..27e224a6137c 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -47,10 +47,9 @@ public TopNMapFn( @SuppressWarnings("unchecked") public Result apply(Cursor cursor) { - final ColumnSelectorPlus[] selectorPlusArray = DimensionHandlerUtils.getDimensionInfo( + final ColumnSelectorPlus[] selectorPlusArray = DimensionHandlerUtils.createColumnSelectorPluses( STRATEGY_FACTORY, Lists.newArrayList(query.getDimensionSpec()), - null, cursor ); diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java b/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java index b5b4a4040bf0..0a141b0aa4fe 100644 --- a/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java +++ b/processing/src/main/java/io/druid/query/topn/types/TopNStrategyFactory.java @@ -28,7 +28,7 @@ public class TopNStrategyFactory implements ColumnSelectorStrategyFactory ColumnSelectorStrategyClass makeStrategy( - ColumnSelectorStrategyFactory strategyFactory, - String dimName, - ColumnCapabilities capabilities, - List availableDimensions - ) - { - capabilities = getEffectiveCapabilities(dimName, capabilities, availableDimensions); - return strategyFactory.makeColumnSelectorStrategy(dimName, capabilities); - } - - public static ColumnSelectorPlus[] getDimensionInfo( + /** + * Creates an array of ColumnSelectorPlus objects, selectors that handle type-specific operations within + * query processing engines, using a strategy factory provided by the query engine. One ColumnSelectorPlus + * will be created for each column specified in dimensionSpecs. + * + * The ColumnSelectorPlus provides access to a type strategy (e.g., how to group on a float column) + * and a value selector for a single column. + * + * A caller should define a strategy factory that provides an interface for type-specific operations + * in a query engine. See GroupByStrategyFactory for a reference. + * + * @param The strategy type created by the provided strategy factory. + * @param strategyFactory A factory provided by query engines that generates type-handling strategies + * @param dimensionSpecs The set of columns to generate ColumnSelectorPlus objects for + * @param cursor Used to create value selectors for columns. + * @return An array of ColumnSelectorPlus objects, in the order of the columns specified in dimensionSpecs + */ + public static ColumnSelectorPlus[] createColumnSelectorPluses( ColumnSelectorStrategyFactory strategyFactory, List dimensionSpecs, - StorageAdapter adapter, ColumnSelectorFactory cursor ) { @@ -95,13 +94,11 @@ public static Colum ColumnSelectorStrategyClass strategy = makeStrategy( strategyFactory, dimName, - cursor.getColumnCapabilities(dimSpec.getDimension()), - adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) + cursor.getColumnCapabilities(dimSpec.getDimension()) ); final ColumnValueSelector selector = getColumnValueSelectorFromDimensionSpec( dimSpec, - cursor, - adapter == null ? null : Lists.newArrayList(adapter.getAvailableDimensions()) + cursor ); final ColumnSelectorPlus selectorPlus = new ColumnSelectorPlus<>( dimName, @@ -117,23 +114,15 @@ public static Colum // When determining the capabilites of a column during query processing, this function // adjusts the capabilities for columns that cannot be handled as-is to manageable defaults // (e.g., treating missing columns as empty String columns) - public static ColumnCapabilities getEffectiveCapabilities( + private static ColumnCapabilities getEffectiveCapabilities( String dimName, - ColumnCapabilities capabilities, - List availableDimensions + ColumnCapabilities capabilities ) { if (capabilities == null) { capabilities = DEFAULT_STRING_CAPABILITIES; } - // treat metrics as null for now - if (availableDimensions != null) { - if (!availableDimensions.contains(dimName)) { - capabilities = DEFAULT_STRING_CAPABILITIES; - } - } - // non-Strings aren't actually supported yet if (capabilities.getType() != ValueType.STRING) { capabilities = DEFAULT_STRING_CAPABILITIES; @@ -142,15 +131,14 @@ public static ColumnCapabilities getEffectiveCapabilities( return capabilities; } - public static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( + private static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( DimensionSpec dimSpec, - ColumnSelectorFactory columnSelectorFactory, - List availableDimensions + ColumnSelectorFactory columnSelectorFactory ) { - String dimName = dimSpec.getOutputName(); + String dimName = dimSpec.getDimension(); ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(dimName); - capabilities = getEffectiveCapabilities(dimName, capabilities, availableDimensions); + capabilities = getEffectiveCapabilities(dimName, capabilities); switch (capabilities.getType()) { case STRING: return columnSelectorFactory.makeDimensionSelector(dimSpec); @@ -158,4 +146,14 @@ public static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( return null; } } + + private static ColumnSelectorStrategyClass makeStrategy( + ColumnSelectorStrategyFactory strategyFactory, + String dimName, + ColumnCapabilities capabilities + ) + { + capabilities = getEffectiveCapabilities(dimName, capabilities); + return strategyFactory.makeColumnSelectorStrategy(capabilities); + } } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 2096dfc6676d..005bf92c90a6 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -34,7 +34,9 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.math.expr.Expr; import io.druid.math.expr.Parser; +import io.druid.query.ColumnSelectorPlus; import io.druid.query.QueryInterruptedException; +import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BooleanFilter; @@ -1069,12 +1071,15 @@ public ValueMatcher makeValueMatcher(String dimension, final String value) } } - final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( - dimension, - cursor.getColumnCapabilities(dimension) - ); + ColumnSelectorPlus[] selector = + DimensionHandlerUtils.createColumnSelectorPluses( + STRATEGY_FACTORY, + ImmutableList.of(DefaultDimensionSpec.of(dimension)), + cursor + ); - return strategy.getValueMatcher(cursor, value); + final ValueMatcherColumnSelectorStrategy strategy = selector[0].getColumnSelectorStrategy(); + return strategy.getValueMatcher(dimension, cursor, value); } @Override @@ -1086,12 +1091,15 @@ public ValueMatcher makeValueMatcher(String dimension, final DruidPredicateFacto } } - final ValueMatcherColumnSelectorStrategy strategy = STRATEGY_FACTORY.makeColumnSelectorStrategy( - dimension, - cursor.getColumnCapabilities(dimension) - ); + ColumnSelectorPlus[] selector = + DimensionHandlerUtils.createColumnSelectorPluses( + STRATEGY_FACTORY, + ImmutableList.of(DefaultDimensionSpec.of(dimension)), + cursor + ); - return strategy.getValueMatcher(cursor, predicateFactory); + final ValueMatcherColumnSelectorStrategy strategy = selector[0].getColumnSelectorStrategy(); + return strategy.getValueMatcher(dimension, cursor, predicateFactory); } private ValueMatcher makeLongValueMatcher(String dimension, final DruidLongPredicate predicate) diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java index cba915f2d223..2687003d0ea6 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorBenchmark.java @@ -29,8 +29,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Range; import io.druid.query.ColumnSelectorPlus; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; -import io.druid.query.aggregation.cardinality.types.StringCardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorColumnSelectorStrategy; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.segment.DimensionSelector; @@ -44,7 +44,7 @@ public class CardinalityAggregatorBenchmark extends SimpleBenchmark CardinalityBufferAggregator agg; List selectorList; - List> dimInfoList; + List> dimInfoList; ByteBuffer buf; int pos; @@ -82,10 +82,10 @@ public String[] apply(Integer input) final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1"); final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null); - final ColumnSelectorPlus dimInfo1 = new ColumnSelectorPlus( + final ColumnSelectorPlus dimInfo1 = new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 ); diff --git a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java index 153faf26188b..aa8560513f4c 100644 --- a/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/cardinality/CardinalityAggregatorTest.java @@ -32,8 +32,8 @@ import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; -import io.druid.query.aggregation.cardinality.types.CardinalityAggColumnSelectorStrategy; -import io.druid.query.aggregation.cardinality.types.StringCardinalityAggColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy; +import io.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorColumnSelectorStrategy; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; @@ -247,19 +247,19 @@ private static void bufferAggregate( } } - List> dimInfoList; + List> dimInfoList; List selectorList; CardinalityAggregatorFactory rowAggregatorFactory; CardinalityAggregatorFactory valueAggregatorFactory; final TestDimensionSelector dim1; final TestDimensionSelector dim2; - List> dimInfoListWithExtraction; + List> dimInfoListWithExtraction; List selectorListWithExtraction; final TestDimensionSelector dim1WithExtraction; final TestDimensionSelector dim2WithExtraction; - List> dimInfoListConstantVal; + List> dimInfoListConstantVal; List selectorListConstantVal; final TestDimensionSelector dim1ConstantVal; final TestDimensionSelector dim2ConstantVal; @@ -273,15 +273,15 @@ public CardinalityAggregatorTest() dim2 = new TestDimensionSelector(values2, null); dimInfoList = Lists.newArrayList( - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 ), - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec2.getDimension(), dimSpec2.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 ) ); @@ -318,15 +318,15 @@ public CardinalityAggregatorTest() dim2WithExtraction ); dimInfoListWithExtraction = Lists.newArrayList( - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim1WithExtraction + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1WithExtraction ), - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim2WithExtraction + new StringCardinalityAggregatorColumnSelectorStrategy(), dim2WithExtraction ) ); @@ -339,15 +339,15 @@ public CardinalityAggregatorTest() dim2ConstantVal ); dimInfoListConstantVal = Lists.newArrayList( - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim1ConstantVal + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1ConstantVal ), - new ColumnSelectorPlus( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim2ConstantVal + new StringCardinalityAggregatorColumnSelectorStrategy(), dim2ConstantVal ) ); @@ -431,18 +431,18 @@ public void testCombineRows() { List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new ColumnSelectorPlus( + List> dimInfo1 = Lists.newArrayList( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new ColumnSelectorPlus( + List> dimInfo2 = Lists.newArrayList( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 ) ); @@ -477,18 +477,18 @@ public void testCombineValues() List selector1 = Lists.newArrayList((DimensionSelector) dim1); List selector2 = Lists.newArrayList((DimensionSelector) dim2); - List> dimInfo1 = Lists.newArrayList( - new ColumnSelectorPlus( + List> dimInfo1 = Lists.newArrayList( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim1 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim1 ) ); - List> dimInfo2 = Lists.newArrayList( - new ColumnSelectorPlus( + List> dimInfo2 = Lists.newArrayList( + new ColumnSelectorPlus( dimSpec1.getDimension(), dimSpec1.getOutputName(), - new StringCardinalityAggColumnSelectorStrategy(), dim2 + new StringCardinalityAggregatorColumnSelectorStrategy(), dim2 ) ); diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 6961f423c5b3..5cdb371e6848 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -6878,4 +6878,43 @@ public void testGroupByCardinalityAggWithExtractionFn() Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); TestHelper.assertExpectedObjects(expectedResults, results, ""); } + + @Test + public void testGroupByCardinalityAggOnFloat() + { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("market", "alias"))) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new CardinalityAggregatorFactory( + "numVals", + ImmutableList.of(new DefaultDimensionSpec( + QueryRunnerTestHelper.indexMetric, + QueryRunnerTestHelper.indexMetric + )), + false + ) + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + + // CardinalityAggregator currently treats non-String columns as having all nulls, so cardinality is 1 for + // the 'index' column + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } } From f5959b5631aaecf43a641ce443cd3579dd3dede4 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 21 Dec 2016 16:02:54 -0800 Subject: [PATCH 12/12] PR comments --- .../druid/query/search/SearchQueryRunner.java | 4 --- .../druid/query/topn/PooledTopNAlgorithm.java | 4 +-- .../topn/TimeExtractionTopNAlgorithm.java | 2 +- .../java/io/druid/query/topn/TopNParams.java | 7 +++-- .../StringTopNColumnSelectorStrategy.java | 2 +- .../query/search/SearchQueryRunnerTest.java | 29 +++++++++++++++++++ 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 9aeac5f52644..8bcafa905f94 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -289,10 +289,6 @@ private void partitionDimensionList( if (index != null) { for (DimensionSpec spec : dimsToSearch) { - if (spec.getDimension().equals(Column.TIME_COLUMN_NAME)) { - bitmapDims.add(spec); - continue; - } ColumnCapabilities capabilities = storageAdapter.getColumnCapabilities(spec.getDimension()); if (capabilities == null) { continue; diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index 41bd8ca785c9..709d7508a7bf 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -194,7 +194,7 @@ protected void scanAndAggregate( final int numBytesPerRecord = params.getNumBytesPerRecord(); final int[] aggregatorSizes = params.getAggregatorSizes(); final Cursor cursor = params.getCursor(); - final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); + final DimensionSelector dimSelector = params.getDimSelector(); final int[] aggregatorOffsets = new int[aggregatorSizes.length]; for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) { @@ -459,7 +459,7 @@ protected void updateResults( { final ByteBuffer resultsBuf = params.getResultsBuf(); final int[] aggregatorSizes = params.getAggregatorSizes(); - final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); + final DimensionSelector dimSelector = params.getDimSelector(); for (int i = 0; i < positions.length; i++) { int position = positions[i]; diff --git a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java index cb53b0f3561b..f4fa5a226777 100644 --- a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -74,7 +74,7 @@ protected void scanAndAggregate( ) { final Cursor cursor = params.getCursor(); - final DimensionSelector dimSelector = (DimensionSelector) params.getDimSelector(); + final DimensionSelector dimSelector = params.getDimSelector(); while (!cursor.isDone()) { final String key = dimSelector.lookupName(dimSelector.getRow().get(0)); diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java index 2083ff97c485..c973267fe34c 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNParams.java +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -22,6 +22,7 @@ import io.druid.query.ColumnSelectorPlus; import io.druid.query.topn.types.TopNColumnSelectorStrategy; import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; /** */ @@ -48,9 +49,11 @@ protected TopNParams( } } - public Object getDimSelector() + // Only used by TopN algorithms that support String exclusively + // Otherwise, get an appropriately typed selector from getSelectorPlus() + public DimensionSelector getDimSelector() { - return selectorPlus.getSelector(); + return (DimensionSelector) selectorPlus.getSelector(); } public ColumnSelectorPlus getSelectorPlus() diff --git a/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java index 10ece39c411e..aa25e03b7750 100644 --- a/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java @@ -28,7 +28,7 @@ public Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams pa // not possible when applying an extraction function final BaseTopNAlgorithm.AggregatorArrayProvider provider = new BaseTopNAlgorithm.AggregatorArrayProvider( - (DimensionSelector) params.getDimSelector(), + (DimensionSelector) params.getSelectorPlus().getSelector(), query, params.getCardinality(), capabilities diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index 10586137931c..6f6b5c38f88f 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -31,6 +31,7 @@ import io.druid.query.Result; import io.druid.query.dimension.ExtractionDimensionSpec; import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.extraction.TimeFormatExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.ExtractionDimFilter; @@ -44,6 +45,7 @@ import io.druid.query.search.search.SearchSortSpec; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.TestHelper; +import io.druid.segment.column.Column; import org.joda.time.DateTime; import org.joda.time.Interval; import org.junit.Assert; @@ -602,6 +604,33 @@ public void testSearchWithNumericSort() checkSearchQuery(searchQuery, expectedHits); } + @Test + public void testSearchOnTime() + { + SearchQuery searchQuery = Druids.newSearchQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .query("Friday") + .dimensions(new ExtractionDimensionSpec( + Column.TIME_COLUMN_NAME, + "__time2", + new TimeFormatExtractionFn( + "EEEE", + null, + null, + null, + false + ) + )) + .build(); + + List expectedHits = Lists.newLinkedList(); + expectedHits.add(new SearchHit("__time2", "Friday", 169)); + + checkSearchQuery(searchQuery, expectedHits); + } + private void checkSearchQuery(Query searchQuery, List expectedResults) {