diff --git a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java index 161d40cc31b0..c295702b94aa 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.hash.Hashing; @@ -39,6 +40,7 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.java.util.common.logger.Logger; import io.druid.query.Druids; +import io.druid.query.Druids.SearchQueryBuilder; import io.druid.query.FinalizeResultsQueryRunner; import io.druid.query.Query; import io.druid.query.QueryRunner; @@ -46,9 +48,17 @@ import io.druid.query.QueryToolChest; import io.druid.query.Result; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; +import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.IdentityExtractionFn; +import io.druid.query.extraction.LowerExtractionFn; +import io.druid.query.extraction.StrlenExtractionFn; +import io.druid.query.extraction.SubstringDimExtractionFn; +import io.druid.query.extraction.UpperExtractionFn; import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.SelectorDimFilter; import io.druid.query.search.SearchQueryQueryToolChest; import io.druid.query.search.SearchQueryRunnerFactory; import io.druid.query.search.SearchResultValue; @@ -146,56 +156,156 @@ public int columnCacheSizeBytes() private void setupQueries() { // queries for the basic schema - Map basicQueries = new LinkedHashMap<>(); - BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic"); + final Map basicQueries = new LinkedHashMap<>(); + final BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic"); - { // basic.A - QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + final List queryTypes = ImmutableList.of("A", "B", "C", "D"); + for (final String eachType : queryTypes) { + basicQueries.put(eachType, makeQuery(eachType, basicSchema)); + } + + SCHEMA_QUERY_MAP.put("basic", basicQueries); + } + + private static SearchQueryBuilder makeQuery(final String name, final BenchmarkSchemaInfo basicSchema) + { + switch (name) { + case "A": + return basicA(basicSchema); + case "B": + return basicB(basicSchema); + case "C": + return basicC(basicSchema); + case "D": + return basicD(basicSchema); + default: + return null; + } + } + + private static SearchQueryBuilder basicA(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - Druids.SearchQueryBuilder queryBuilderA = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("123"); + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("123"); + } - basicQueries.put("A", queryBuilderA); + private static SearchQueryBuilder basicB(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + + final List dimUniformFilterVals = Lists.newArrayList(); + int resultNum = (int) (100000 * 0.1); + int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); } - { // basic.B - QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + List dimHyperUniqueFilterVals = Lists.newArrayList(); + resultNum = (int) (100000 * 0.1); + step = 100000 / resultNum; + for (int i = 0; i < 100001 && dimHyperUniqueFilterVals.size() < resultNum; i += step) { + dimHyperUniqueFilterVals.add(String.valueOf(i)); + } - List dimUniformFilterVals = Lists.newArrayList(); - int resultNum = (int) (100000 * 0.1); - int step = 100000 / resultNum; - for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { - dimUniformFilterVals.add(String.valueOf(i)); + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null)); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform", "dimHyperUnique")) + .filters(new AndDimFilter(dimFilters)); + } + + private static SearchQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); + } + + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance())); + dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{0xF}; } - List dimHyperUniqueFilterVals = Lists.newArrayList(); - resultNum = (int) (100000 * 0.1); - step = 100000 / resultNum; - for (int i = 0; i < 100001 && dimHyperUniqueFilterVals.size() < resultNum; i += step) { - dimHyperUniqueFilterVals.add(String.valueOf(i)); + @Override + public String apply(String value) + { + return String.valueOf(Long.parseLong(value) + 1); } - final List dimFilters = Lists.newArrayList(); - dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null)); - dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null)); + @Override + public boolean preservesOrdering() + { + return false; + } - Druids.SearchQueryBuilder queryBuilderB = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("") - .dimensions(Lists.newArrayList("dimUniform", "dimHyperUnique")) - .filters(new AndDimFilter(dimFilters)); + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.ONE_TO_ONE; + } + }, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3))); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); + } + + private static SearchQueryBuilder basicD(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - basicQueries.put("B", queryBuilderB); + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); } - SCHEMA_QUERY_MAP.put("basic", basicQueries); + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new SelectorDimFilter(dimName, "3", null)); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); } @Setup @@ -357,7 +467,10 @@ public void queryMultiQueryableIndex(Blackhole blackhole) throws Exception ); Sequence> queryResult = theRunner.run(query, Maps.newHashMap()); - List> results = Sequences.toList(queryResult, Lists.>newArrayList()); + List> results = Sequences.toList( + queryResult, + Lists.>newArrayList() + ); for (Result result : results) { List hits = result.getValue().getValue(); diff --git a/processing/src/main/java/io/druid/query/filter/Filter.java b/processing/src/main/java/io/druid/query/filter/Filter.java index 60f38269494c..0aabd4beeef3 100644 --- a/processing/src/main/java/io/druid/query/filter/Filter.java +++ b/processing/src/main/java/io/druid/query/filter/Filter.java @@ -20,6 +20,7 @@ package io.druid.query.filter; import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -30,18 +31,39 @@ public interface Filter * Get a bitmap index, indicating rows that match this filter. * * @param selector Object used to retrieve bitmap indexes + * * @return A bitmap indicating rows that match this filter. + * + * @see Filter#estimateSelectivity(BitmapIndexSelector) + */ + ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); + + + /** + * Estimate selectivity of this filter. + * This method can be used for cost-based query planning like in {@link io.druid.query.search.search.AutoStrategy}. + * To avoid significant performance degradation for calculating the exact cost, + * implementation of this method targets to achieve rapid selectivity estimation + * with reasonable sacrifice of the accuracy. + * As a result, the estimated selectivity might be different from the exact value. + * + * @param indexSelector Object used to retrieve bitmap indexes + * + * @return an estimated selectivity ranging from 0 (filter selects no rows) to 1 (filter selects all rows). + * + * @see Filter#getBitmapIndex(BitmapIndexSelector) */ - public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); + double estimateSelectivity(BitmapIndexSelector indexSelector); /** * Get a ValueMatcher that applies this filter to row values. * * @param factory Object used to create ValueMatchers + * * @return ValueMatcher that applies this filter to row values. */ - public ValueMatcher makeMatcher(ColumnSelectorFactory factory); + ValueMatcher makeMatcher(ColumnSelectorFactory factory); /** @@ -49,7 +71,21 @@ public interface Filter * the information provided by the input BitmapIndexSelector. * * @param selector Object used to retrieve bitmap indexes - * @return true if this Filter can provide a bitmap index using the selector, false otherwise + * + * @return true if this Filter can provide a bitmap index using the selector, false otherwise. + */ + boolean supportsBitmapIndex(BitmapIndexSelector selector); + + + /** + * Indicates whether this filter supports selectivity estimation. + * A filter supports selectivity estimation if it supports bitmap index and + * the dimension which the filter evaluates does not have multi values. + * + * @param columnSelector Object to check the dimension has multi values. + * @param indexSelector Object used to retrieve bitmap indexes + * + * @return true if this Filter supports selectivity estimation, false otherwise. */ - public boolean supportsBitmapIndex(BitmapIndexSelector selector); + boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); } diff --git a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java index 81dc05ca4cf9..f574c108bed5 100644 --- a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java @@ -20,7 +20,6 @@ package io.druid.query.search.search; import com.metamx.emitter.EmittingLogger; -import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.BitmapIndexSelector; import io.druid.segment.ColumnSelectorBitmapIndexSelector; @@ -58,18 +57,12 @@ public List getExecutionPlan(SearchQuery query, Segment seg index ); - // Index-only plan is used only when any filter is not specified or every filter supports bitmap indexes. + // Index-only plan is used only when any filter is not specified or the filter supports bitmap indexes. // // Note: if some filters support bitmap indexes but others are not, the current implementation always employs // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index - // from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps. - if (filter == null || filter.supportsBitmapIndex(selector)) { - final ImmutableBitmap timeFilteredBitmap = UseIndexesStrategy.makeTimeFilteredBitmap( - index, - segment, - filter, - interval - ); + // from the non-bitmap-support filters, and then use it to compute the filtered result by intersecting bitmaps. + if (filter == null || filter.supportsSelectivityEstimation(index, selector)) { final List dimsToSearch = getDimsToSearch( index.getAvailableDimensions(), query.getDimensions() @@ -84,15 +77,19 @@ public List getExecutionPlan(SearchQuery query, Segment seg // * (search predicate processing cost) final SearchQueryDecisionHelper helper = getDecisionHelper(index); final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch); - final double cursorOnlyStrategyCost = - (timeFilteredBitmap == null ? index.getNumRows() : timeFilteredBitmap.size()) * dimsToSearch.size(); - log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", - useIndexStrategyCost, cursorOnlyStrategyCost + final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector)) + * selector.getNumRows() + * dimsToSearch.size(); + + log.debug( + "Use-index strategy cost: %f, cursor-only strategy cost: %f", + useIndexStrategyCost, + cursorOnlyStrategyCost ); if (useIndexStrategyCost < cursorOnlyStrategyCost) { log.debug("Use-index execution strategy is selected, query id [%s]", query.getId()); - return UseIndexesStrategy.withTimeFilteredBitmap(query, timeFilteredBitmap).getExecutionPlan(query, segment); + return UseIndexesStrategy.of(query).getExecutionPlan(query, segment); } else { log.debug("Cursor-only execution strategy is selected, query id [%s]", query.getId()); return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment); diff --git a/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java b/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java index fcf6f23016ec..e68d032224f7 100644 --- a/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java @@ -44,7 +44,6 @@ import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import org.joda.time.Interval; -import javax.annotation.Nullable; import java.util.Arrays; import java.util.List; @@ -52,31 +51,16 @@ public class UseIndexesStrategy extends SearchStrategy { public static final String NAME = "useIndexes"; - private final ImmutableBitmap timeFilteredBitmap; - private final boolean needToMakeFilteredBitmap; - public static UseIndexesStrategy of(SearchQuery query) { - return new UseIndexesStrategy(query, true, null); - } - - public static UseIndexesStrategy withTimeFilteredBitmap( - SearchQuery query, - @Nullable ImmutableBitmap timeFilteredBitmap - ) - { - return new UseIndexesStrategy(query, false, timeFilteredBitmap); + return new UseIndexesStrategy(query); } private UseIndexesStrategy( - SearchQuery query, - boolean needToMakeFilteredBitmap, - @Nullable ImmutableBitmap timeFilteredBitmap + SearchQuery query ) { super(query); - this.needToMakeFilteredBitmap = needToMakeFilteredBitmap; - this.timeFilteredBitmap = timeFilteredBitmap; } @Override @@ -105,9 +89,7 @@ public List getExecutionPlan(SearchQuery query, Segment seg // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index // from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps. if (filter == null || filter.supportsBitmapIndex(selector)) { - final ImmutableBitmap timeFilteredBitmap = this.needToMakeFilteredBitmap ? - makeTimeFilteredBitmap(index, segment, filter, interval) : - this.timeFilteredBitmap; + final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(index, segment, filter, interval); builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims)); } else { // Fall back to cursor-based execution strategy diff --git a/processing/src/main/java/io/druid/segment/IntIteratorUtils.java b/processing/src/main/java/io/druid/segment/IntIteratorUtils.java index 45e205a17eae..75189b7511d3 100644 --- a/processing/src/main/java/io/druid/segment/IntIteratorUtils.java +++ b/processing/src/main/java/io/druid/segment/IntIteratorUtils.java @@ -22,8 +22,11 @@ import com.metamx.common.IAE; import com.metamx.common.guava.MergeIterator; import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntIterators; +import it.unimi.dsi.fastutil.ints.IntList; +import it.unimi.dsi.fastutil.ints.IntLists; import it.unimi.dsi.fastutil.longs.LongHeaps; import java.util.List; @@ -193,5 +196,14 @@ public int skip(int n) } } + public static IntList toIntList(IntIterator iterator) + { + final IntList integers = new IntArrayList(); + while (iterator.hasNext()) { + integers.add(iterator.nextInt()); + } + return IntLists.unmodifiable(integers); + } + private IntIteratorUtils() {} } diff --git a/processing/src/main/java/io/druid/segment/IntListUtils.java b/processing/src/main/java/io/druid/segment/IntListUtils.java new file mode 100644 index 000000000000..f4289eeac72a --- /dev/null +++ b/processing/src/main/java/io/druid/segment/IntListUtils.java @@ -0,0 +1,60 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.AbstractIntList; +import it.unimi.dsi.fastutil.ints.IntList; + +public class IntListUtils +{ + private IntListUtils() {} + + public static IntList fromTo(int from, int to) + { + Preconditions.checkArgument(from <= to); + return new RangeIntList(from, to); + } + + private static final class RangeIntList extends AbstractIntList + { + private final int start; + private final int size; + + RangeIntList(int start, int end) + { + this.start = start; + this.size = end - start; + } + + @Override + public int getInt(int index) + { + Preconditions.checkElementIndex(index, size); + return start + index; + } + + @Override + public int size() + { + return size; + } + } +} diff --git a/processing/src/main/java/io/druid/segment/filter/AndFilter.java b/processing/src/main/java/io/druid/segment/filter/AndFilter.java index 2c9ad19343ae..dbe4c7f45f09 100644 --- a/processing/src/main/java/io/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/AndFilter.java @@ -28,6 +28,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import java.util.ArrayList; @@ -148,6 +149,30 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return true; } + @Override + public boolean supportsSelectivityEstimation( + final ColumnSelector columnSelector, final BitmapIndexSelector indexSelector + ) + { + for (Filter filter : filters) { + if (!filter.supportsSelectivityEstimation(columnSelector, indexSelector)) { + return false; + } + } + return true; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + // Estimate selectivity with attribute value independence assumption + double selectivity = 1.0; + for (final Filter filter : filters) { + selectivity *= filter.estimateSelectivity(indexSelector); + } + return selectivity; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index dac3383ee1d0..aeb735de7239 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -22,6 +22,7 @@ import com.google.common.base.Predicate; import com.google.common.base.Supplier; import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.java.util.common.Pair; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; @@ -30,11 +31,13 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.ordering.StringComparators; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntListUtils; import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.IntList; import java.util.Comparator; -import java.util.Iterator; public class BoundFilter implements Filter { @@ -55,82 +58,53 @@ public BoundFilter(final BoundDimFilter boundDimFilter) @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - if (boundDimFilter.getOrdering().equals(StringComparators.LEXICOGRAPHIC) && extractionFn == null) { - // Optimization for lexicographic bounds with no extractionFn => binary search through the index - + if (supportShortCircuit()) { final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { return doesMatch(null) ? Filters.allTrue(selector) : Filters.allFalse(selector); } - // search for start, end indexes in the bitmaps; then include all bitmaps between those points - - final int startIndex; // inclusive - final int endIndex; // exclusive + return selector.getBitmapFactory().union(getBitmapIterator(boundDimFilter, bitmapIndex)); + } else { + return Filters.matchPredicate( + boundDimFilter.getDimension(), + selector, + getPredicateFactory().makeStringPredicate() + ); + } + } - if (!boundDimFilter.hasLowerBound()) { - startIndex = 0; - } else { - final int found = bitmapIndex.getIndex(boundDimFilter.getLower()); - if (found >= 0) { - startIndex = boundDimFilter.isLowerStrict() ? found + 1 : found; - } else { - startIndex = -(found + 1); - } - } + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + if (supportShortCircuit()) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(boundDimFilter.getDimension()); - if (!boundDimFilter.hasUpperBound()) { - endIndex = bitmapIndex.getCardinality(); - } else { - final int found = bitmapIndex.getIndex(boundDimFilter.getUpper()); - if (found >= 0) { - endIndex = boundDimFilter.isUpperStrict() ? found : found + 1; - } else { - endIndex = -(found + 1); - } + if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { + return doesMatch(null) ? 1. : 0.; } - return selector.getBitmapFactory().union( - new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - int currIndex = startIndex; - - @Override - public boolean hasNext() - { - return currIndex < endIndex; - } - - @Override - public ImmutableBitmap next() - { - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - } + return Filters.estimatePredicateSelectivity( + bitmapIndex, + getBitmapIndexList(boundDimFilter, bitmapIndex), + indexSelector.getNumRows() ); } else { - return Filters.matchPredicate( + return Filters.estimatePredicateSelectivity( boundDimFilter.getDimension(), - selector, + indexSelector, getPredicateFactory().makeStringPredicate() ); } } + private boolean supportShortCircuit() + { + // Optimization for lexicographic bounds with no extractionFn => binary search through the index + return boundDimFilter.getOrdering().equals(StringComparators.LEXICOGRAPHIC) && extractionFn == null; + } + @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { @@ -143,6 +117,70 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, boundDimFilter.getDimension(), columnSelector, indexSelector); + } + + private static Pair getStartEndIndexes( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) + { + final int startIndex; // inclusive + int endIndex; // exclusive + + if (!boundDimFilter.hasLowerBound()) { + startIndex = 0; + } else { + final int found = bitmapIndex.getIndex(boundDimFilter.getLower()); + if (found >= 0) { + startIndex = boundDimFilter.isLowerStrict() ? found + 1 : found; + } else { + startIndex = -(found + 1); + } + } + + if (!boundDimFilter.hasUpperBound()) { + endIndex = bitmapIndex.getCardinality(); + } else { + final int found = bitmapIndex.getIndex(boundDimFilter.getUpper()); + if (found >= 0) { + endIndex = boundDimFilter.isUpperStrict() ? found : found + 1; + } else { + endIndex = -(found + 1); + } + } + + endIndex = startIndex > endIndex ? startIndex : endIndex; + + return new Pair<>(startIndex, endIndex); + } + + private static Iterable getBitmapIterator( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) + { + return Filters.bitmapsFromIndexes(getBitmapIndexList(boundDimFilter, bitmapIndex), bitmapIndex); + } + + private static IntList getBitmapIndexList( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) + { + // search for start, end indexes in the bitmaps; then include all bitmaps between those points + final Pair indexes = getStartEndIndexes(boundDimFilter, bitmapIndex); + final int startIndex = indexes.lhs; + final int endIndex = indexes.rhs; + + return IntListUtils.fromTo(startIndex, endIndex); + } + private DruidPredicateFactory getPredicateFactory() { return new DruidPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 6af811392835..0155f9f4cb9e 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -28,6 +28,7 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -104,6 +105,24 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return Filters.estimatePredicateSelectivity( + dimension, + indexSelector, + predicateFactory.makeStringPredicate() + ); + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index d71d1ba8dec1..f4b1ccdabb63 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -19,6 +19,7 @@ package io.druid.segment.filter; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -40,13 +41,20 @@ import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherColumnSelectorStrategy; import io.druid.query.filter.ValueMatcherColumnSelectorStrategyFactory; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionHandlerUtils; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.LongColumnSelector; import io.druid.segment.column.BitmapIndex; +import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntList; import java.util.ArrayList; import java.util.Iterator; @@ -99,7 +107,7 @@ public static Filter toFilter(DimFilter dimFilter) /** * Create a ValueMatcher that compares row values to the provided string. - * + *

* An implementation of this method should be able to handle dimensions of various types. * * @param columnSelectorFactory Selector for columns. @@ -136,10 +144,10 @@ public static ValueMatcher makeValueMatcher( /** * Create a ValueMatcher that applies a predicate to row values. - * + *

* The caller provides a predicate factory that can create a predicate for each value type supported by Druid. * See {@link DruidPredicateFactory} for more information. - * + *

* When creating the ValueMatcher, the ValueMatcherFactory implementation should decide what type of predicate * to create from the predicate factory based on the ValueType of the specified dimension. * @@ -186,6 +194,48 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) .complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows()); } + /** + * Transform an iterable of indexes of bitmaps to an iterable of bitmaps + * + * @param indexes indexes of bitmaps + * @param bitmapIndex an object to retrieve bitmaps using indexes + * + * @return an iterable of bitmaps + */ + static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) + { + // Do not use Iterables.transform() to avoid boxing/unboxing integers. + return new Iterable() + { + @Override + public Iterator iterator() + { + final IntIterator iterator = indexes.iterator(); + + return new Iterator() + { + @Override + public boolean hasNext() + { + return iterator.hasNext(); + } + + @Override + public ImmutableBitmap next() + { + return bitmapIndex.getBitmap(iterator.nextInt()); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + /** * Return the union of bitmaps for all values matching a particular predicate. * @@ -194,6 +244,8 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) * @param predicate predicate to use * * @return bitmap of matching rows + * + * @see #estimatePredicateSelectivity(String, BitmapIndexSelector, Predicate) */ public static ImmutableBitmap matchPredicate( final String dimension, @@ -208,73 +260,145 @@ public static ImmutableBitmap matchPredicate( // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise final Indexed dimValues = selector.getDimensionValues(dimension); if (dimValues == null || dimValues.size() == 0) { - if (predicate.apply(null)) { - return selector.getBitmapFactory().complement( - selector.getBitmapFactory().makeEmptyImmutableBitmap(), - selector.getNumRows() - ); - } else { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } + return predicate.apply(null) ? allTrue(selector) : allFalse(selector); } // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - return selector.getBitmapFactory().union( - new Iterable() + return selector.getBitmapFactory() + .union(makePredicateQualifyingBitmapIterable(bitmapIndex, predicate, dimValues)); + } + + /** + * Return an estimated selectivity for bitmaps of all values matching the given predicate. + * + * @param dimension dimension to look at + * @param indexSelector bitmap selector + * @param predicate predicate to use + * + * @return estimated selectivity + * + * @see #matchPredicate(String, BitmapIndexSelector, Predicate) + */ + static double estimatePredicateSelectivity( + final String dimension, + final BitmapIndexSelector indexSelector, + final Predicate predicate + ) + { + Preconditions.checkNotNull(dimension, "dimension"); + Preconditions.checkNotNull(indexSelector, "selector"); + Preconditions.checkNotNull(predicate, "predicate"); + + // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise + final Indexed dimValues = indexSelector.getDimensionValues(dimension); + if (dimValues == null || dimValues.size() == 0) { + return predicate.apply(null) ? 1. : 0.; + } + + // Apply predicate to all dimension values and union the matching bitmaps + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + return estimatePredicateSelectivity( + bitmapIndex, + IntIteratorUtils.toIntList(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues).iterator()), + indexSelector.getNumRows() + ); + } + + @VisibleForTesting + static double estimatePredicateSelectivity( + BitmapIndex bitmapIndex, + IntList bitmapIndexes, + long totalNumRows + ) + { + long numMatchedRows = 0; + for (int i = 0; i < bitmapIndexes.size(); i++) { + final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmapIndexes.get(i)); + numMatchedRows += bitmap.size(); + } + + return Math.min(1., (double) numMatchedRows / totalNumRows); + } + + private static Iterable makePredicateQualifyingBitmapIterable( + final BitmapIndex bitmapIndex, + final Predicate predicate, + final Indexed dimValues + ) + { + return bitmapsFromIndexes(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues), bitmapIndex); + } + + private static IntIterable makePredicateQualifyingIndexIterable( + final BitmapIndex bitmapIndex, + final Predicate predicate, + final Indexed dimValues + ) + { + return new IntIterable() + { + @Override + public IntIterator iterator() + { + return new AbstractIntIterator() { + private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); + private int nextIndex = 0; + private int found = -1; + + { + found = findNextIndex(); + } + + private int findNextIndex() + { + while (nextIndex < bitmapIndexCardinality && !predicate.apply(dimValues.get(nextIndex))) { + nextIndex++; + } + + if (nextIndex < bitmapIndexCardinality) { + return nextIndex++; + } else { + return -1; + } + } + @Override - public Iterator iterator() + public boolean hasNext() { - return new Iterator() - { - private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); - private int nextIndex = 0; - private ImmutableBitmap nextBitmap; - - { - findNextBitmap(); - } - - private void findNextBitmap() - { - while (nextIndex < bitmapIndexCardinality) { - if (predicate.apply(dimValues.get(nextIndex))) { - nextBitmap = bitmapIndex.getBitmap(nextIndex); - nextIndex++; - return; - } - nextIndex++; - } - nextBitmap = null; - } - - @Override - public boolean hasNext() - { - return nextBitmap != null; - } - - @Override - public ImmutableBitmap next() - { - ImmutableBitmap bitmap = nextBitmap; - if (bitmap == null) { - throw new NoSuchElementException(); - } - findNextBitmap(); - return bitmap; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; + return found != -1; } - } - ); + + @Override + public int nextInt() + { + int foundIndex = this.found; + if (foundIndex == -1) { + throw new NoSuchElementException(); + } + this.found = findNextIndex(); + return foundIndex; + } + }; + } + }; + } + + static boolean supportsSelectivityEstimation( + Filter filter, + String dimension, + ColumnSelector columnSelector, + BitmapIndexSelector indexSelector + ) + { + if (filter.supportsBitmapIndex(indexSelector)) { + final Column column = columnSelector.getColumn(dimension); + if (column != null) { + return !column.getCapabilities().hasMultipleValues(); + } + } + return false; } public static ValueMatcher getLongValueMatcher( @@ -372,7 +496,6 @@ private static Filter pushDownNot(Filter current) } - if (current instanceof OrFilter) { List children = Lists.newArrayList(); for (Filter child : ((OrFilter) current).getFilters()) { @@ -425,7 +548,8 @@ private static Filter convertToCNFInternal(Filter current) // CNF conversion functions were adapted from Apache Hive, see: // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java - private static Filter flatten(Filter root) { + private static Filter flatten(Filter root) + { if (root instanceof BooleanFilter) { List children = new ArrayList<>(); children.addAll(((BooleanFilter) root).getFilters()); @@ -436,7 +560,7 @@ private static Filter flatten(Filter root) { // do we need to flatten? if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) { boolean first = true; - List grandKids = ((BooleanFilter)child).getFilters(); + List grandKids = ((BooleanFilter) child).getFilters(); for (Filter grandkid : grandKids) { // for the first grandkid replace the original parent if (first) { diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 650b1b8b879a..c9a7ce902d1b 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -19,11 +19,9 @@ package io.druid.segment.filter; -import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.base.Supplier; -import com.google.common.collect.Iterables; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; @@ -31,8 +29,15 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntIteratorUtils; +import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import java.util.Iterator; import java.util.Set; /** @@ -61,18 +66,8 @@ public InFilter( public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { if (extractionFn == null) { - return selector.getBitmapFactory().union( - Iterables.transform( - values, new Function() - { - @Override - public ImmutableBitmap apply(String value) - { - return selector.getBitmapIndex(dimension, value); - } - } - ) - ); + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + return selector.getBitmapFactory().union(getBitmapIterable(bitmapIndex)); } else { return Filters.matchPredicate( dimension, @@ -82,6 +77,57 @@ public ImmutableBitmap apply(String value) } } + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + if (extractionFn == null) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + return Filters.estimatePredicateSelectivity( + bitmapIndex, + IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), + indexSelector.getNumRows() + ); + } else { + return Filters.estimatePredicateSelectivity( + dimension, + indexSelector, + getPredicateFactory().makeStringPredicate() + ); + } + } + + private Iterable getBitmapIterable(final BitmapIndex bitmapIndex) + { + return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex); + } + + private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) + { + return new IntIterable() + { + @Override + public IntIterator iterator() + { + return new AbstractIntIterator() + { + Iterator iterator = values.iterator(); + + @Override + public boolean hasNext() + { + return iterator.hasNext(); + } + + @Override + public int nextInt() + { + return bitmapIndex.getIndex(iterator.next()); + } + }; + } + }; + } + @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { @@ -94,6 +140,14 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + private DruidPredicateFactory getPredicateFactory() { return new DruidPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index df22ebaea40d..ab7561a6db9f 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -25,6 +25,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import org.mozilla.javascript.Context; @@ -47,22 +48,37 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { final Context cx = Context.enter(); try { - final Predicate contextualPredicate = new Predicate() - { - @Override - public boolean apply(String input) - { - return predicateFactory.applyInContext(cx, input); - } - }; + return Filters.matchPredicate(dimension, selector, makeStringPredicate(cx)); + } + finally { + Context.exit(); + } + } - return Filters.matchPredicate(dimension, selector, contextualPredicate); + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + final Context cx = Context.enter(); + try { + return Filters.estimatePredicateSelectivity(dimension, indexSelector, makeStringPredicate(cx)); } finally { Context.exit(); } } + private Predicate makeStringPredicate(final Context context) + { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return predicateFactory.applyInContext(context, input); + } + }; + } + @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { @@ -75,4 +91,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } } diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 8773da0c698a..b49088f49deb 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -26,11 +26,17 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.LikeDimFilter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.Indexed; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntList; -import java.util.Iterator; +import java.util.NoSuchElementException; public class LikeFilter implements Filter { @@ -52,10 +58,10 @@ public LikeFilter( @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - if (extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY) { + if (emptyExtractFn() && emptySuffixMatch()) { // dimension equals prefix return selector.getBitmapIndex(dimension, likeMatcher.getPrefix()); - } else if (extractionFn == null && !likeMatcher.getPrefix().isEmpty()) { + } else if (emptyExtractFn() && nonEmptyPrefix()) { // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); @@ -67,58 +73,9 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points final Indexed dimValues = selector.getDimensionValues(dimension); - final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); - final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; - final int startIndex; // inclusive - final int endIndex; // exclusive - - final int lowerFound = bitmapIndex.getIndex(lower); - startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); - - final int upperFound = bitmapIndex.getIndex(upper); - endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); - // Union bitmaps for all matching dimension values in range. // Use lazy iterator to allow unioning bitmaps one by one and avoid materializing all of them at once. - return selector.getBitmapFactory().union( - new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - int currIndex = startIndex; - - @Override - public boolean hasNext() - { - return currIndex < endIndex; - } - - @Override - public ImmutableBitmap next() - { - while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues, currIndex)) { - currIndex++; - } - - if (currIndex == endIndex) { - return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); - } - - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - } - ); + return selector.getBitmapFactory().union(getBitmapIterator(bitmapIndex, likeMatcher, dimValues)); } else { // fallback return Filters.matchPredicate( @@ -129,6 +86,56 @@ public void remove() } } + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + if (emptyExtractFn() && emptySuffixMatch()) { + // dimension equals prefix + return (double) indexSelector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() + / indexSelector.getNumRows(); + } else if (emptyExtractFn() && nonEmptyPrefix()) { + // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + + if (bitmapIndex == null) { + // Treat this as a column full of nulls + return likeMatcher.matches(null) ? 1. : 0.; + } + + // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points + final Indexed dimValues = indexSelector.getDimensionValues(dimension); + + // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. + return Filters.estimatePredicateSelectivity( + bitmapIndex, + getBitmapIndexList(bitmapIndex, likeMatcher, dimValues), + indexSelector.getNumRows() + ); + } else { + // fallback + return Filters.estimatePredicateSelectivity( + dimension, + indexSelector, + likeMatcher.predicateFactory(extractionFn).makeStringPredicate() + ); + } + } + + private boolean emptyExtractFn() + { + return extractionFn == null; + } + + private boolean emptySuffixMatch() + { + return likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; + } + + private boolean nonEmptyPrefix() + { + return !likeMatcher.getPrefix().isEmpty(); + } + @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { @@ -140,4 +147,97 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + private static Iterable getBitmapIterator( + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) + { + return Filters.bitmapsFromIndexes(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), bitmapIndex); + } + + private static IntList getBitmapIndexList( + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) + { + return IntIteratorUtils.toIntList(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues).iterator()); + } + + private static IntIterable getBitmapIndexIterator( + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) + { + final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); + final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; + final int startIndex; // inclusive + final int endIndex; // exclusive + + final int lowerFound = bitmapIndex.getIndex(lower); + startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); + + final int upperFound = bitmapIndex.getIndex(upper); + endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); + + return new IntIterable() + { + @Override + public IntIterator iterator() + { + return new AbstractIntIterator() + { + int currIndex = startIndex; + int found = -1; + + { + found = findNext(); + } + + private int findNext() + { + while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues, currIndex)) { + currIndex++; + } + + if (currIndex < endIndex) { + return currIndex++; + } else { + return -1; + } + } + + @Override + public boolean hasNext() + { + return found != -1; + } + + @Override + public int nextInt() + { + int cur = found; + + if (cur == -1) { + throw new NoSuchElementException(); + } + + found = findNext(); + return cur; + } + }; + } + }; + } } diff --git a/processing/src/main/java/io/druid/segment/filter/NotFilter.java b/processing/src/main/java/io/druid/segment/filter/NotFilter.java index acd51a32a8e8..cbba340b5c86 100644 --- a/processing/src/main/java/io/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/NotFilter.java @@ -23,6 +23,7 @@ import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -68,6 +69,20 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return baseFilter.supportsBitmapIndex(selector); } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return baseFilter.supportsSelectivityEstimation(columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return 1. - baseFilter.estimateSelectivity(indexSelector); + } + public Filter getBaseFilter() { return baseFilter; diff --git a/processing/src/main/java/io/druid/segment/filter/OrFilter.java b/processing/src/main/java/io/druid/segment/filter/OrFilter.java index f870a5cb8abc..02fe68f937f4 100644 --- a/processing/src/main/java/io/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/OrFilter.java @@ -27,6 +27,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import java.util.ArrayList; @@ -155,6 +156,30 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return true; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + for (Filter filter : filters) { + if(!filter.supportsSelectivityEstimation(columnSelector, indexSelector)) { + return false; + } + } + return true; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + // Estimate selectivity with attribute value independence assumption + double selectivity = 0; + for (final Filter filter : filters) { + selectivity += filter.estimateSelectivity(indexSelector); + } + return Math.min(selectivity, 1.); + } + public String toString() { return String.format("(%s)", OR_JOINER.join(filters)); diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index 9c3654d282b8..987921f7ff0b 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -23,6 +23,7 @@ import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -59,6 +60,20 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return (double) indexSelector.getBitmapIndex(dimension, value).size() / indexSelector.getNumRows(); + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index dd5d530971d2..a17831fa297f 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -27,6 +27,7 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.incremental.SpatialDimensionRowTransformer; @@ -100,4 +101,19 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + // selectivity estimation for multi-value columns is not implemented yet. + throw new UnsupportedOperationException(); + } } diff --git a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java new file mode 100644 index 000000000000..e1b992af4edd --- /dev/null +++ b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import it.unimi.dsi.fastutil.ints.IntList; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class IntListUtilsTest +{ + @Test(expected = IndexOutOfBoundsException.class) + public void testEmptyRangeIntList() + { + final IntList list = IntListUtils.fromTo(10, 10); + assertEquals(0, list.size()); + list.get(0); + } + + @Test(expected = IllegalArgumentException.class) + public void testRangeIntListWithSmallEndIndex() + { + IntListUtils.fromTo(10, 5); + } + + @Test + public void testRangeIntList() + { + final IntList list = IntListUtils.fromTo(20, 120); + for (int i = 0; i < 100; i++) { + assertEquals(i + 20, list.getInt(i)); + } + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java index 97916b253c25..0c5cc996d1ce 100644 --- a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java @@ -43,6 +43,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.groupby.RowBasedColumnSelectorFactory; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -385,6 +386,20 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return false; } + + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + return 1.0; + } }; final Sequence cursors = makeCursorSequence(postFilteringFilter); diff --git a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java new file mode 100644 index 000000000000..d7dd5abcf7aa --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java @@ -0,0 +1,109 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.segment.IntIteratorUtils; +import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.IntIterators; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class FiltersTest +{ + @Test + public void testEstimateSelectivityOfBitmapList() + { + final int bitmapNum = 100; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + final BitmapIndex bitmapIndex = makeNonOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.estimatePredicateSelectivity( + bitmapIndex, + IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)), + 10000 + ); + final double expected = 0.1; + assertEquals(expected, estimated, 0.00001); + } + + private static BitmapIndex getBitmapIndex(final List bitmapList) + { + return new BitmapIndex() + { + @Override + public int getCardinality() + { + return 10; + } + + @Override + public String getValue(int index) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNulls() + { + return false; + } + + @Override + public BitmapFactory getBitmapFactory() + { + return new ConciseBitmapFactory(); + } + + @Override + public int getIndex(String value) + { + throw new UnsupportedOperationException(); + } + + @Override + public ImmutableBitmap getBitmap(int idx) + { + return bitmapList.get(idx); + } + }; + } + + private static BitmapIndex makeNonOverlappedBitmapIndexes(final int bitmapNum, final List bitmaps) + { + final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); + final BitmapFactory factory = bitmapIndex.getBitmapFactory(); + for (int i = 0; i < bitmapNum; i++) { + final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); + for (int j = 0; j < 10; j++) { + mutableBitmap.add(i * 10 + j); + } + bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); + } + return bitmapIndex; + } +}