From f02fc66515ffe93b20419053c9db1ba0bae00dda Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Fri, 13 Jan 2017 20:07:47 +0900 Subject: [PATCH 1/9] Add filter selectivity estimation for auto search strategy --- .../benchmark/query/SearchBenchmark.java | 101 ++++++++++- .../java/io/druid/query/filter/Filter.java | 15 +- .../query/search/search/AutoStrategy.java | 19 +-- .../search/search/UseIndexesStrategy.java | 24 +-- .../io/druid/segment/filter/AndFilter.java | 11 ++ .../io/druid/segment/filter/BoundFilter.java | 157 ++++++++++++------ .../filter/DimensionPredicateFilter.java | 11 ++ .../java/io/druid/segment/filter/Filters.java | 151 +++++++++++------ .../io/druid/segment/filter/InFilter.java | 19 +++ .../segment/filter/JavaScriptFilter.java | 21 +++ .../io/druid/segment/filter/LikeFilter.java | 144 +++++++++++----- .../io/druid/segment/filter/NotFilter.java | 6 + .../io/druid/segment/filter/OrFilter.java | 11 ++ .../druid/segment/filter/SelectorFilter.java | 6 + .../druid/segment/filter/SpatialFilter.java | 10 ++ .../druid/segment/filter/BaseFilterTest.java | 6 + 16 files changed, 527 insertions(+), 185 deletions(-) diff --git a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java index 161d40cc31b0..205261754bd8 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java @@ -46,9 +46,17 @@ import io.druid.query.QueryToolChest; import io.druid.query.Result; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; +import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.IdentityExtractionFn; +import io.druid.query.extraction.LowerExtractionFn; +import io.druid.query.extraction.StrlenExtractionFn; +import io.druid.query.extraction.SubstringDimExtractionFn; +import io.druid.query.extraction.UpperExtractionFn; import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.SelectorDimFilter; import io.druid.query.search.SearchQueryQueryToolChest; import io.druid.query.search.SearchQueryRunnerFactory; import io.druid.query.search.SearchResultValue; @@ -163,9 +171,9 @@ private void setupQueries() } { // basic.B - QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - List dimUniformFilterVals = Lists.newArrayList(); + final List dimUniformFilterVals = Lists.newArrayList(); int resultNum = (int) (100000 * 0.1); int step = 100000 / resultNum; for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { @@ -183,7 +191,7 @@ private void setupQueries() dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null)); dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null)); - Druids.SearchQueryBuilder queryBuilderB = + final Druids.SearchQueryBuilder queryBuilderB = Druids.newSearchQueryBuilder() .dataSource("blah") .granularity(QueryGranularities.ALL) @@ -195,6 +203,93 @@ private void setupQueries() basicQueries.put("B", queryBuilderB); } + { // basic.C + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); + } + + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance())); + dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[] {0xF}; + } + + @Override + public String apply(String value) + { + return String.valueOf(Long.parseLong(value) + 1); + } + + @Override + public boolean preservesOrdering() + { + return false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.ONE_TO_ONE; + } + }, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3))); + + final Druids.SearchQueryBuilder queryBuilderC = + Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); + + basicQueries.put("C", queryBuilderC); + } + + { // basic.D + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); + } + + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new SelectorDimFilter(dimName, "3", null)); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + + final Druids.SearchQueryBuilder queryBuilderC = + Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); + + basicQueries.put("D", queryBuilderC); + } + SCHEMA_QUERY_MAP.put("basic", basicQueries); } diff --git a/processing/src/main/java/io/druid/query/filter/Filter.java b/processing/src/main/java/io/druid/query/filter/Filter.java index 60f38269494c..c6ee8178166c 100644 --- a/processing/src/main/java/io/druid/query/filter/Filter.java +++ b/processing/src/main/java/io/druid/query/filter/Filter.java @@ -32,7 +32,7 @@ public interface Filter * @param selector Object used to retrieve bitmap indexes * @return A bitmap indicating rows that match this filter. */ - public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); + ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); /** @@ -41,7 +41,7 @@ public interface Filter * @param factory Object used to create ValueMatchers * @return ValueMatcher that applies this filter to row values. */ - public ValueMatcher makeMatcher(ColumnSelectorFactory factory); + ValueMatcher makeMatcher(ColumnSelectorFactory factory); /** @@ -51,5 +51,14 @@ public interface Filter * @param selector Object used to retrieve bitmap indexes * @return true if this Filter can provide a bitmap index using the selector, false otherwise */ - public boolean supportsBitmapIndex(BitmapIndexSelector selector); + boolean supportsBitmapIndex(BitmapIndexSelector selector); + + /** + * Estimate selectivity of this filter. The estimated selectivity might be different from the exact value. + * + * @param selector Object used to retrieve bitmap indexes + * @param totalNumRows total number of rows in a segment + * @return Selectivity ranging from 0 to 1. + */ + double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows); } diff --git a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java index 81dc05ca4cf9..27449bcd573d 100644 --- a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java @@ -20,7 +20,6 @@ package io.druid.query.search.search; import com.metamx.emitter.EmittingLogger; -import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.BitmapIndexSelector; import io.druid.segment.ColumnSelectorBitmapIndexSelector; @@ -58,18 +57,12 @@ public List getExecutionPlan(SearchQuery query, Segment seg index ); - // Index-only plan is used only when any filter is not specified or every filter supports bitmap indexes. + // Index-only plan is used only when any filter is not specified or the filter supports bitmap indexes. // // Note: if some filters support bitmap indexes but others are not, the current implementation always employs // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index - // from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps. + // from the non-bitmap-support filters, and then use it to compute the filtered result by intersecting bitmaps. if (filter == null || filter.supportsBitmapIndex(selector)) { - final ImmutableBitmap timeFilteredBitmap = UseIndexesStrategy.makeTimeFilteredBitmap( - index, - segment, - filter, - interval - ); final List dimsToSearch = getDimsToSearch( index.getAvailableDimensions(), query.getDimensions() @@ -83,16 +76,18 @@ public List getExecutionPlan(SearchQuery query, Segment seg // c_cursor = (# of rows in a segment) * (filter selectivity) * (# of dimensions) // * (search predicate processing cost) final SearchQueryDecisionHelper helper = getDecisionHelper(index); + final long totalNumRows = index.getNumRows(); final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch); - final double cursorOnlyStrategyCost = - (timeFilteredBitmap == null ? index.getNumRows() : timeFilteredBitmap.size()) * dimsToSearch.size(); + final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector, totalNumRows)) + * totalNumRows * dimsToSearch.size(); + log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", useIndexStrategyCost, cursorOnlyStrategyCost ); if (useIndexStrategyCost < cursorOnlyStrategyCost) { log.debug("Use-index execution strategy is selected, query id [%s]", query.getId()); - return UseIndexesStrategy.withTimeFilteredBitmap(query, timeFilteredBitmap).getExecutionPlan(query, segment); + return UseIndexesStrategy.of(query).getExecutionPlan(query, segment); } else { log.debug("Cursor-only execution strategy is selected, query id [%s]", query.getId()); return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment); diff --git a/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java b/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java index fcf6f23016ec..e68d032224f7 100644 --- a/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/UseIndexesStrategy.java @@ -44,7 +44,6 @@ import it.unimi.dsi.fastutil.objects.Object2IntRBTreeMap; import org.joda.time.Interval; -import javax.annotation.Nullable; import java.util.Arrays; import java.util.List; @@ -52,31 +51,16 @@ public class UseIndexesStrategy extends SearchStrategy { public static final String NAME = "useIndexes"; - private final ImmutableBitmap timeFilteredBitmap; - private final boolean needToMakeFilteredBitmap; - public static UseIndexesStrategy of(SearchQuery query) { - return new UseIndexesStrategy(query, true, null); - } - - public static UseIndexesStrategy withTimeFilteredBitmap( - SearchQuery query, - @Nullable ImmutableBitmap timeFilteredBitmap - ) - { - return new UseIndexesStrategy(query, false, timeFilteredBitmap); + return new UseIndexesStrategy(query); } private UseIndexesStrategy( - SearchQuery query, - boolean needToMakeFilteredBitmap, - @Nullable ImmutableBitmap timeFilteredBitmap + SearchQuery query ) { super(query); - this.needToMakeFilteredBitmap = needToMakeFilteredBitmap; - this.timeFilteredBitmap = timeFilteredBitmap; } @Override @@ -105,9 +89,7 @@ public List getExecutionPlan(SearchQuery query, Segment seg // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index // from the non-bitmap-support filter, and then use it to compute the filtered result by intersecting bitmaps. if (filter == null || filter.supportsBitmapIndex(selector)) { - final ImmutableBitmap timeFilteredBitmap = this.needToMakeFilteredBitmap ? - makeTimeFilteredBitmap(index, segment, filter, interval) : - this.timeFilteredBitmap; + final ImmutableBitmap timeFilteredBitmap = makeTimeFilteredBitmap(index, segment, filter, interval); builder.add(new IndexOnlyExecutor(query, segment, timeFilteredBitmap, bitmapSuppDims)); } else { // Fall back to cursor-based execution strategy diff --git a/processing/src/main/java/io/druid/segment/filter/AndFilter.java b/processing/src/main/java/io/druid/segment/filter/AndFilter.java index 93ac4fc58c2a..fd29b4343636 100644 --- a/processing/src/main/java/io/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/AndFilter.java @@ -148,6 +148,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return true; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + // Estimate selectivity with attribute value independence assumption + double selectivity = 1.0; + for (final Filter filter : filters) { + selectivity *= filter.estimateSelectivity(selector, totalNumRows); + } + return selectivity; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index dac3383ee1d0..81590af3aa98 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -22,6 +22,7 @@ import com.google.common.base.Predicate; import com.google.common.base.Supplier; import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.java.util.common.Pair; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; @@ -65,62 +66,12 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } // search for start, end indexes in the bitmaps; then include all bitmaps between those points - - final int startIndex; // inclusive - final int endIndex; // exclusive - - if (!boundDimFilter.hasLowerBound()) { - startIndex = 0; - } else { - final int found = bitmapIndex.getIndex(boundDimFilter.getLower()); - if (found >= 0) { - startIndex = boundDimFilter.isLowerStrict() ? found + 1 : found; - } else { - startIndex = -(found + 1); - } - } - - if (!boundDimFilter.hasUpperBound()) { - endIndex = bitmapIndex.getCardinality(); - } else { - final int found = bitmapIndex.getIndex(boundDimFilter.getUpper()); - if (found >= 0) { - endIndex = boundDimFilter.isUpperStrict() ? found : found + 1; - } else { - endIndex = -(found + 1); - } - } + final Pair indexes = getStartEndIndexes(bitmapIndex); + final int startIndex = indexes.lhs; + final int endIndex = indexes.rhs; return selector.getBitmapFactory().union( - new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - int currIndex = startIndex; - - @Override - public boolean hasNext() - { - return currIndex < endIndex; - } - - @Override - public ImmutableBitmap next() - { - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - } + getBitmapIterator(startIndex, endIndex, bitmapIndex) ); } else { return Filters.matchPredicate( @@ -143,6 +94,104 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + if (selector.getBitmapIndex(boundDimFilter.getDimension()) != null) { + final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); + + if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { + return doesMatch(null) ? 1. : 0.; + } + + // search for start, end indexes in the bitmaps; then include all bitmaps between those points + final Pair indexes = getStartEndIndexes(bitmapIndex); + final int startIndex = indexes.lhs; + final int endIndex = indexes.rhs; + + long matchRowNum = 0; + for (final ImmutableBitmap bitmap : getBitmapIterator(startIndex, endIndex, bitmapIndex)) { + matchRowNum += bitmap.size(); + } + + return (double) matchRowNum / totalNumRows; + } else { + return Filters.estimatePredicateSelectivity( + boundDimFilter.getDimension(), + selector, + getPredicateFactory().makeStringPredicate(), + totalNumRows + ); + } + } + + private Pair getStartEndIndexes(final BitmapIndex bitmapIndex) + { + final int startIndex; // inclusive + final int endIndex; // exclusive + + if (!boundDimFilter.hasLowerBound()) { + startIndex = 0; + } else { + final int found = bitmapIndex.getIndex(boundDimFilter.getLower()); + if (found >= 0) { + startIndex = boundDimFilter.isLowerStrict() ? found + 1 : found; + } else { + startIndex = -(found + 1); + } + } + + if (!boundDimFilter.hasUpperBound()) { + endIndex = bitmapIndex.getCardinality(); + } else { + final int found = bitmapIndex.getIndex(boundDimFilter.getUpper()); + if (found >= 0) { + endIndex = boundDimFilter.isUpperStrict() ? found : found + 1; + } else { + endIndex = -(found + 1); + } + } + + return new Pair<>(startIndex, endIndex); + } + + private static Iterable getBitmapIterator( + final int startIndex, + final int endIndex, + final BitmapIndex bitmapIndex + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + int currIndex = startIndex; + + @Override + public boolean hasNext() + { + return currIndex < endIndex; + } + + @Override + public ImmutableBitmap next() + { + return bitmapIndex.getBitmap(currIndex++); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + private DruidPredicateFactory getPredicateFactory() { return new DruidPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 6af811392835..0878945e73ee 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -104,6 +104,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + return Filters.estimatePredicateSelectivity( + dimension, + selector, + predicateFactory.makeStringPredicate(), + totalNumRows + ); + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index aa94464b8470..0569d46a05d6 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -99,7 +99,7 @@ public static Filter toFilter(DimFilter dimFilter) /** * Create a ValueMatcher that compares row values to the provided string. - * + *

* An implementation of this method should be able to handle dimensions of various types. * * @param columnSelectorFactory Selector for columns. @@ -136,10 +136,10 @@ public static ValueMatcher makeValueMatcher( /** * Create a ValueMatcher that applies a predicate to row values. - * + *

* The caller provides a predicate factory that can create a predicate for each value type supported by Druid. * See {@link DruidPredicateFactory} for more information. - * + *

* When creating the ValueMatcher, the ValueMatcherFactory implementation should decide what type of predicate * to create from the predicate factory based on the ValueType of the specified dimension. * @@ -220,61 +220,106 @@ public static ImmutableBitmap matchPredicate( // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - return selector.getBitmapFactory().union( - new Iterable() + return selector.getBitmapFactory() + .union(createPredicateQualifyingBitmapIterator(bitmapIndex, predicate, dimValues)); + } + + static double estimatePredicateSelectivity( + final String dimension, + final BitmapIndexSelector selector, + final Predicate predicate, + final long totalNumRows + ) + { + Preconditions.checkNotNull(dimension, "dimension"); + Preconditions.checkNotNull(selector, "selector"); + Preconditions.checkNotNull(predicate, "predicate"); + + // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise + final Indexed dimValues = selector.getDimensionValues(dimension); + if (dimValues == null || dimValues.size() == 0) { + if (predicate.apply(null)) { + return 1.; + } else { + return 0.; + } + } + + // Apply predicate to all dimension values and union the matching bitmaps + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + final Iterator iterator = createPredicateQualifyingBitmapIterator( + bitmapIndex, + predicate, + dimValues + ).iterator(); + + long matchRowNum = 0; + while (iterator.hasNext()) { + final ImmutableBitmap next = iterator.next(); + matchRowNum += next.size(); + } + return (double) matchRowNum / totalNumRows; + } + + private static Iterable createPredicateQualifyingBitmapIterator( + final BitmapIndex bitmapIndex, + final Predicate predicate, + final Indexed dimValues + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() { - @Override - public Iterator iterator() + private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); + private int nextIndex = 0; + private ImmutableBitmap nextBitmap; + { - return new Iterator() - { - private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); - private int nextIndex = 0; - private ImmutableBitmap nextBitmap; - - { - findNextBitmap(); - } + findNextBitmap(); + } - private void findNextBitmap() - { - while (nextIndex < bitmapIndexCardinality) { - if (predicate.apply(dimValues.get(nextIndex))) { - nextBitmap = bitmapIndex.getBitmap(nextIndex); - nextIndex++; - return; - } - nextIndex++; - } - nextBitmap = null; + private void findNextBitmap() + { + while (nextIndex < bitmapIndexCardinality) { + if (predicate.apply(dimValues.get(nextIndex))) { + nextBitmap = bitmapIndex.getBitmap(nextIndex); + nextIndex++; + return; } + nextIndex++; + } + nextBitmap = null; + } - @Override - public boolean hasNext() - { - return nextBitmap != null; - } + @Override + public boolean hasNext() + { + return nextBitmap != null; + } - @Override - public ImmutableBitmap next() - { - ImmutableBitmap bitmap = nextBitmap; - if (bitmap == null) { - throw new NoSuchElementException(); - } - findNextBitmap(); - return bitmap; - } + @Override + public ImmutableBitmap next() + { + ImmutableBitmap bitmap = nextBitmap; + if (bitmap == null) { + throw new NoSuchElementException(); + } + findNextBitmap(); + return bitmap; + } - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; + @Override + public void remove() + { + throw new UnsupportedOperationException(); } - } - ); + }; + } + }; } public static ValueMatcher getLongValueMatcher( @@ -372,7 +417,6 @@ private static Filter pushDownNot(Filter current) } - if (current instanceof OrFilter) { List children = Lists.newArrayList(); for (Filter child : ((OrFilter) current).getFilters()) { @@ -425,7 +469,8 @@ private static Filter convertToCNFInternal(Filter current) // CNF conversion functions were adapted from Apache Hive, see: // https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java - private static Filter flatten(Filter root) { + private static Filter flatten(Filter root) + { if (root instanceof BooleanFilter) { List children = new ArrayList<>(); children.addAll(((BooleanFilter) root).getFilters()); @@ -436,7 +481,7 @@ private static Filter flatten(Filter root) { // do we need to flatten? if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) { boolean first = true; - List grandKids = ((BooleanFilter)child).getFilters(); + List grandKids = ((BooleanFilter) child).getFilters(); for (Filter grandkid : grandKids) { // for the first grandkid replace the original parent if (first) { diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 650b1b8b879a..0f6743ea65fa 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -94,6 +94,25 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + if (extractionFn == null) { + long matchedNumRows = 0; + for (final String eachVal : values) { + matchedNumRows += selector.getBitmapIndex(dimension, eachVal).size(); + } + return (double) matchedNumRows / totalNumRows; + } else { + return Filters.estimatePredicateSelectivity( + dimension, + selector, + getPredicateFactory().makeStringPredicate(), + totalNumRows + ); + } + } + private DruidPredicateFactory getPredicateFactory() { return new DruidPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index df22ebaea40d..9d964552bbb8 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -75,4 +75,25 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + final Context cx = Context.enter(); + try { + final Predicate contextualPredicate = new Predicate() + { + @Override + public boolean apply(String input) + { + return predicateFactory.applyInContext(cx, input); + } + }; + + return Filters.estimatePredicateSelectivity(dimension, selector, contextualPredicate, totalNumRows); + } + finally { + Context.exit(); + } + } } diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 934b48929e16..c825fe518684 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -80,45 +80,8 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) // Union bitmaps for all matching dimension values in range. // Use lazy iterator to allow unioning bitmaps one by one and avoid materializing all of them at once. - return selector.getBitmapFactory().union( - new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - int currIndex = startIndex; - - @Override - public boolean hasNext() - { - return currIndex < endIndex; - } - - @Override - public ImmutableBitmap next() - { - while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues.get(currIndex))) { - currIndex++; - } - - if (currIndex == endIndex) { - return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); - } - - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - } - ); + return selector.getBitmapFactory() + .union(getBitmapIterator(startIndex, endIndex, bitmapIndex, likeMatcher, dimValues)); } else { // fallback return Filters.matchPredicate( @@ -140,4 +103,107 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + if (extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY) { + // dimension equals prefix + return (double) selector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() / totalNumRows; + } else if (extractionFn == null && !likeMatcher.getPrefix().isEmpty()) { + // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + + if (bitmapIndex == null) { + // Treat this as a column full of nulls + return likeMatcher.matches(null) ? 1. : 0.; + } + + // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points + final Indexed dimValues = selector.getDimensionValues(dimension); + + final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); + final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; + final int startIndex; // inclusive + final int endIndex; // exclusive + + final int lowerFound = bitmapIndex.getIndex(lower); + startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); + + final int upperFound = bitmapIndex.getIndex(upper); + endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); + + // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. + final Iterator iterator = getBitmapIterator( + startIndex, + endIndex, + bitmapIndex, + likeMatcher, + dimValues + ).iterator(); + + long matchRowNum = 0; + while (iterator.hasNext()) { + final ImmutableBitmap bitmap = iterator.next(); + matchRowNum += bitmap.size(); + } + + return (double) matchRowNum / totalNumRows; + } else { + // fallback + return Filters.estimatePredicateSelectivity( + dimension, + selector, + likeMatcher.predicateFactory(extractionFn).makeStringPredicate(), + totalNumRows + ); + } + } + + private static Iterable getBitmapIterator( + final int startIndex, + final int endIndex, + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + int currIndex = startIndex; + + @Override + public boolean hasNext() + { + return currIndex < endIndex; + } + + @Override + public ImmutableBitmap next() + { + while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues.get(currIndex))) { + currIndex++; + } + + if (currIndex == endIndex) { + return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); + } + + return bitmapIndex.getBitmap(currIndex++); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } } diff --git a/processing/src/main/java/io/druid/segment/filter/NotFilter.java b/processing/src/main/java/io/druid/segment/filter/NotFilter.java index acd51a32a8e8..44eb180c1a5b 100644 --- a/processing/src/main/java/io/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/NotFilter.java @@ -68,6 +68,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return baseFilter.supportsBitmapIndex(selector); } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + return 1. - baseFilter.estimateSelectivity(selector, totalNumRows); + } + public Filter getBaseFilter() { return baseFilter; diff --git a/processing/src/main/java/io/druid/segment/filter/OrFilter.java b/processing/src/main/java/io/druid/segment/filter/OrFilter.java index f870a5cb8abc..be173764587c 100644 --- a/processing/src/main/java/io/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/OrFilter.java @@ -155,6 +155,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return true; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + // Estimate selectivity with attribute value independence assumption + double selectivity = 0; + for (final Filter filter : filters) { + selectivity += filter.estimateSelectivity(selector, totalNumRows); + } + return Math.min(selectivity, 1.); + } + public String toString() { return String.format("(%s)", OR_JOINER.join(filters)); diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index 9c3654d282b8..b514ba594cad 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -59,6 +59,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + return (double) selector.getBitmapIndex(dimension, value).size() / totalNumRows; + } + @Override public String toString() { diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index dd5d530971d2..41960b4f0c22 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -100,4 +100,14 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + long matchRowNum = 0; + for (final ImmutableBitmap bitmap : selector.getSpatialIndex(dimension).search(bound)) { + matchRowNum += bitmap.size(); + } + return (double) matchRowNum / totalNumRows; + } } diff --git a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java index 97916b253c25..1aadf486064e 100644 --- a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java @@ -385,6 +385,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return false; } + + @Override + public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + { + return 1.0; + } }; final Sequence cursors = makeCursorSequence(postFilteringFilter); From 587ecfe30780a7a29154f2a47d2a4f2ea18ca20b Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 23 Jan 2017 18:14:07 +0900 Subject: [PATCH 2/9] Addressed comments --- .../java/io/druid/query/filter/Filter.java | 33 +++-- .../query/search/search/AutoStrategy.java | 6 +- .../io/druid/segment/filter/AndFilter.java | 5 +- .../io/druid/segment/filter/BoundFilter.java | 85 ++++++------- .../filter/DimensionPredicateFilter.java | 9 +- .../java/io/druid/segment/filter/Filters.java | 119 +++++++++++++----- .../io/druid/segment/filter/InFilter.java | 67 +++++----- .../segment/filter/JavaScriptFilter.java | 55 ++++---- .../io/druid/segment/filter/LikeFilter.java | 117 ++++++++--------- .../io/druid/segment/filter/NotFilter.java | 5 +- .../io/druid/segment/filter/OrFilter.java | 5 +- .../druid/segment/filter/SelectorFilter.java | 5 +- .../druid/segment/filter/SpatialFilter.java | 14 ++- .../druid/segment/filter/BaseFilterTest.java | 3 +- 14 files changed, 298 insertions(+), 230 deletions(-) diff --git a/processing/src/main/java/io/druid/query/filter/Filter.java b/processing/src/main/java/io/druid/query/filter/Filter.java index c6ee8178166c..ffc1ce5a47e1 100644 --- a/processing/src/main/java/io/druid/query/filter/Filter.java +++ b/processing/src/main/java/io/druid/query/filter/Filter.java @@ -20,6 +20,7 @@ package io.druid.query.filter; import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -30,15 +31,37 @@ public interface Filter * Get a bitmap index, indicating rows that match this filter. * * @param selector Object used to retrieve bitmap indexes + * * @return A bitmap indicating rows that match this filter. + * + * @see Filter#estimateSelectivity(ColumnSelector, BitmapIndexSelector) */ ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); + /** + * Estimate selectivity of this filter. + * This method can be used for cost-based query planning like in {@link io.druid.query.search.search.AutoStrategy}. + * To avoid significant performance degradation for calculating the exact cost, + * implementation of this method targets to achieve rapid selectivity estimation + * with reasonable sacrifice of the accuracy. + * As a result, the estimated selectivity might be different from the exact value. + * + * @param columnSelector Column selector to retrieve column capabilities + * @param indexSelector Object used to retrieve bitmap indexes + * + * @return an estimated selectivity ranging from 0 (filter selects no rows) to 1 (filter selects all rows). + * + * @see Filter#getBitmapIndex(BitmapIndexSelector) + */ + double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); + + /** * Get a ValueMatcher that applies this filter to row values. * * @param factory Object used to create ValueMatchers + * * @return ValueMatcher that applies this filter to row values. */ ValueMatcher makeMatcher(ColumnSelectorFactory factory); @@ -49,16 +72,8 @@ public interface Filter * the information provided by the input BitmapIndexSelector. * * @param selector Object used to retrieve bitmap indexes + * * @return true if this Filter can provide a bitmap index using the selector, false otherwise */ boolean supportsBitmapIndex(BitmapIndexSelector selector); - - /** - * Estimate selectivity of this filter. The estimated selectivity might be different from the exact value. - * - * @param selector Object used to retrieve bitmap indexes - * @param totalNumRows total number of rows in a segment - * @return Selectivity ranging from 0 to 1. - */ - double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows); } diff --git a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java index 27449bcd573d..e43038697256 100644 --- a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java @@ -76,10 +76,10 @@ public List getExecutionPlan(SearchQuery query, Segment seg // c_cursor = (# of rows in a segment) * (filter selectivity) * (# of dimensions) // * (search predicate processing cost) final SearchQueryDecisionHelper helper = getDecisionHelper(index); - final long totalNumRows = index.getNumRows(); final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch); - final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector, totalNumRows)) - * totalNumRows * dimsToSearch.size(); + final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(index, selector)) + * selector.getNumRows() + * dimsToSearch.size(); log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", useIndexStrategyCost, cursorOnlyStrategyCost diff --git a/processing/src/main/java/io/druid/segment/filter/AndFilter.java b/processing/src/main/java/io/druid/segment/filter/AndFilter.java index fd29b4343636..4c5ae63e0136 100644 --- a/processing/src/main/java/io/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/AndFilter.java @@ -28,6 +28,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import java.util.ArrayList; @@ -149,12 +150,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { // Estimate selectivity with attribute value independence assumption double selectivity = 1.0; for (final Filter filter : filters) { - selectivity *= filter.estimateSelectivity(selector, totalNumRows); + selectivity *= filter.estimateSelectivity(columnSelector, indexSelector); } return selectivity; } diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 81590af3aa98..a892fd01d77d 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -31,6 +31,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.ordering.StringComparators; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.column.BitmapIndex; @@ -56,22 +57,14 @@ public BoundFilter(final BoundDimFilter boundDimFilter) @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - if (boundDimFilter.getOrdering().equals(StringComparators.LEXICOGRAPHIC) && extractionFn == null) { - // Optimization for lexicographic bounds with no extractionFn => binary search through the index - + if (supportShortCircuit()) { final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { return doesMatch(null) ? Filters.allTrue(selector) : Filters.allFalse(selector); } - // search for start, end indexes in the bitmaps; then include all bitmaps between those points - final Pair indexes = getStartEndIndexes(bitmapIndex); - final int startIndex = indexes.lhs; - final int endIndex = indexes.rhs; - - return selector.getBitmapFactory().union( - getBitmapIterator(startIndex, endIndex, bitmapIndex) + return selector.getBitmapFactory().union(getBitmapIterator(boundDimFilter, bitmapIndex) ); } else { return Filters.matchPredicate( @@ -83,49 +76,53 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - return Filters.makeValueMatcher(factory, boundDimFilter.getDimension(), getPredicateFactory()); - } - - @Override - public boolean supportsBitmapIndex(BitmapIndexSelector selector) - { - return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; - } - - @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) - { - if (selector.getBitmapIndex(boundDimFilter.getDimension()) != null) { - final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); + if (supportShortCircuit()) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(boundDimFilter.getDimension()); if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { return doesMatch(null) ? 1. : 0.; } - // search for start, end indexes in the bitmaps; then include all bitmaps between those points - final Pair indexes = getStartEndIndexes(bitmapIndex); - final int startIndex = indexes.lhs; - final int endIndex = indexes.rhs; - - long matchRowNum = 0; - for (final ImmutableBitmap bitmap : getBitmapIterator(startIndex, endIndex, bitmapIndex)) { - matchRowNum += bitmap.size(); - } - - return (double) matchRowNum / totalNumRows; + return Filters.estimatePredicateSelectivity( + columnSelector, + boundDimFilter.getDimension(), + getBitmapIterator(boundDimFilter, bitmapIndex), + indexSelector.getNumRows() + ); } else { return Filters.estimatePredicateSelectivity( + columnSelector, boundDimFilter.getDimension(), - selector, - getPredicateFactory().makeStringPredicate(), - totalNumRows + indexSelector, + getPredicateFactory().makeStringPredicate() ); } } - private Pair getStartEndIndexes(final BitmapIndex bitmapIndex) + private boolean supportShortCircuit() + { + // Optimization for lexicographic bounds with no extractionFn => binary search through the index + return boundDimFilter.getOrdering().equals(StringComparators.LEXICOGRAPHIC) && extractionFn == null; + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, boundDimFilter.getDimension(), getPredicateFactory()); + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; + } + + private static Pair getStartEndIndexes( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) { final int startIndex; // inclusive final int endIndex; // exclusive @@ -156,11 +153,15 @@ private Pair getStartEndIndexes(final BitmapIndex bitmapIndex) } private static Iterable getBitmapIterator( - final int startIndex, - final int endIndex, + final BoundDimFilter boundDimFilter, final BitmapIndex bitmapIndex ) { + // search for start, end indexes in the bitmaps; then include all bitmaps between those points + final Pair indexes = getStartEndIndexes(boundDimFilter, bitmapIndex); + final int startIndex = indexes.lhs; + final int endIndex = indexes.rhs; + return new Iterable() { @Override diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 0878945e73ee..030e175c63be 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -28,6 +28,7 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -105,13 +106,13 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { return Filters.estimatePredicateSelectivity( + columnSelector, dimension, - selector, - predicateFactory.makeStringPredicate(), - totalNumRows + indexSelector, + predicateFactory.makeStringPredicate() ); } diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 0569d46a05d6..ff404b3708fb 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -40,6 +40,7 @@ import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherColumnSelectorStrategy; import io.druid.query.filter.ValueMatcherColumnSelectorStrategyFactory; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionHandlerUtils; import io.druid.segment.LongColumnSelector; @@ -52,6 +53,7 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.Random; /** */ @@ -59,6 +61,8 @@ public class Filters { public static final List FILTERABLE_TYPES = ImmutableList.of(ValueType.STRING, ValueType.LONG); private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; + private static final Random random = new Random(System.currentTimeMillis()); + private static final int SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION = 100; /** * Convert a list of DimFilters to a list of Filters. @@ -194,6 +198,8 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) * @param predicate predicate to use * * @return bitmap of matching rows + * + * @see Filters#estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ public static ImmutableBitmap matchPredicate( final String dimension, @@ -208,60 +214,109 @@ public static ImmutableBitmap matchPredicate( // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise final Indexed dimValues = selector.getDimensionValues(dimension); if (dimValues == null || dimValues.size() == 0) { - if (predicate.apply(null)) { - return selector.getBitmapFactory().complement( - selector.getBitmapFactory().makeEmptyImmutableBitmap(), - selector.getNumRows() - ); - } else { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } + return predicate.apply(null) ? allTrue(selector) : allFalse(selector); } // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); return selector.getBitmapFactory() - .union(createPredicateQualifyingBitmapIterator(bitmapIndex, predicate, dimValues)); + .union(makePredicateQualifyingBitmapIterable(bitmapIndex, predicate, dimValues)); } + /** + * Return an estimated selectivity for bitmaps of all values matching the given predicate. + * + * @param columnSelector column selector + * @param dimension dimension to look at + * @param indexSelector bitmap selector + * @param predicate predicate to use + * + * @return estimated selectivity + * + * @see Filters#matchPredicate(String, BitmapIndexSelector, Predicate) + */ static double estimatePredicateSelectivity( + final ColumnSelector columnSelector, final String dimension, - final BitmapIndexSelector selector, - final Predicate predicate, - final long totalNumRows + final BitmapIndexSelector indexSelector, + final Predicate predicate ) { Preconditions.checkNotNull(dimension, "dimension"); - Preconditions.checkNotNull(selector, "selector"); + Preconditions.checkNotNull(indexSelector, "selector"); Preconditions.checkNotNull(predicate, "predicate"); // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise - final Indexed dimValues = selector.getDimensionValues(dimension); + final Indexed dimValues = indexSelector.getDimensionValues(dimension); if (dimValues == null || dimValues.size() == 0) { - if (predicate.apply(null)) { - return 1.; - } else { - return 0.; - } + return predicate.apply(null) ? 1. : 0.; } // Apply predicate to all dimension values and union the matching bitmaps - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - final Iterator iterator = createPredicateQualifyingBitmapIterator( - bitmapIndex, - predicate, - dimValues - ).iterator(); - - long matchRowNum = 0; - while (iterator.hasNext()) { - final ImmutableBitmap next = iterator.next(); - matchRowNum += next.size(); + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + return estimatePredicateSelectivity( + columnSelector, + dimension, + makePredicateQualifyingBitmapIterable( + bitmapIndex, + predicate, + dimValues + ), + indexSelector.getNumRows() + ); + } + + static double estimatePredicateSelectivity( + ColumnSelector columnSelector, + String dimension, + Iterable bitmaps, + long totalNumRows + ) + { + final ColumnCapabilities columnCapabilities = columnSelector.getColumn(dimension).getCapabilities(); + return estimatePredicateSelectivity( + bitmaps, + totalNumRows, + columnCapabilities == null || columnCapabilities.hasMultipleValues() + ); + } + + static double estimatePredicateSelectivity( + Iterable bitmaps, + long totalNumRows, + boolean isMultiValueDimension + ) + { + long numMatchedRows = 0; + final List bitmapList = Lists.newArrayList(bitmaps); + for (ImmutableBitmap bitmap : bitmapList) { + numMatchedRows += bitmap.size(); + } + + // assume multi-value column if columnCapabilities is null + if (isMultiValueDimension) { + final double estimated = numMatchedRows * Filters.computeNonOverlapRatioFromBitmapSamples(bitmapList) + / totalNumRows; + return Math.min(1., estimated); + } else { + return (double) numMatchedRows / totalNumRows; + } + } + + private static double computeNonOverlapRatioFromBitmapSamples(List bitmaps) + { + final int sampleNum = Math.min(SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION, bitmaps.size()); + double nonOverlapRatioSum = 0.; + for (int i = 0; i < sampleNum; i++) { + final ImmutableBitmap b1 = bitmaps.get(random.nextInt(bitmaps.size())); + final ImmutableBitmap b2 = bitmaps.get(random.nextInt(bitmaps.size())); + + nonOverlapRatioSum += b1.union(b2).size() / (b1.size() + b2.size()); } - return (double) matchRowNum / totalNumRows; + return nonOverlapRatioSum / sampleNum; } - private static Iterable createPredicateQualifyingBitmapIterator( + private static Iterable makePredicateQualifyingBitmapIterable( final BitmapIndex bitmapIndex, final Predicate predicate, final Indexed dimValues diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 0f6743ea65fa..dab73a337245 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -31,6 +31,7 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import java.util.Set; @@ -61,18 +62,7 @@ public InFilter( public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { if (extractionFn == null) { - return selector.getBitmapFactory().union( - Iterables.transform( - values, new Function() - { - @Override - public ImmutableBitmap apply(String value) - { - return selector.getBitmapIndex(dimension, value); - } - } - ) - ); + return selector.getBitmapFactory().union(getBitmapIterable(selector)); } else { return Filters.matchPredicate( dimension, @@ -83,34 +73,49 @@ public ImmutableBitmap apply(String value) } @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); + if (extractionFn == null) { + return Filters.estimatePredicateSelectivity( + columnSelector, + dimension, + getBitmapIterable(indexSelector), + indexSelector.getNumRows() + ); + } else { + return Filters.estimatePredicateSelectivity( + columnSelector, + dimension, + indexSelector, + getPredicateFactory().makeStringPredicate() + ); + } + } + + private Iterable getBitmapIterable(final BitmapIndexSelector selector) + { + return Iterables.transform( + values, new Function() + { + @Override + public ImmutableBitmap apply(String value) + { + return selector.getBitmapIndex(dimension, value); + } + } + ); } @Override - public boolean supportsBitmapIndex(BitmapIndexSelector selector) + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { - return selector.getBitmapIndex(dimension) != null; + return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public boolean supportsBitmapIndex(BitmapIndexSelector selector) { - if (extractionFn == null) { - long matchedNumRows = 0; - for (final String eachVal : values) { - matchedNumRows += selector.getBitmapIndex(dimension, eachVal).size(); - } - return (double) matchedNumRows / totalNumRows; - } else { - return Filters.estimatePredicateSelectivity( - dimension, - selector, - getPredicateFactory().makeStringPredicate(), - totalNumRows - ); - } + return selector.getBitmapIndex(dimension) != null; } private DruidPredicateFactory getPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index 9d964552bbb8..3aa24f5363c6 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -25,6 +25,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import org.mozilla.javascript.Context; @@ -47,22 +48,37 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { final Context cx = Context.enter(); try { - final Predicate contextualPredicate = new Predicate() - { - @Override - public boolean apply(String input) - { - return predicateFactory.applyInContext(cx, input); - } - }; + return Filters.matchPredicate(dimension, selector, makeStringPredicate(cx)); + } + finally { + Context.exit(); + } + } - return Filters.matchPredicate(dimension, selector, contextualPredicate); + @Override + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + { + final Context cx = Context.enter(); + try { + return Filters.estimatePredicateSelectivity(columnSelector, dimension, indexSelector, makeStringPredicate(cx)); } finally { Context.exit(); } } + private Predicate makeStringPredicate(final Context context) + { + return new Predicate() + { + @Override + public boolean apply(String input) + { + return predicateFactory.applyInContext(context, input); + } + }; + } + @Override public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { @@ -75,25 +91,4 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } - - @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) - { - final Context cx = Context.enter(); - try { - final Predicate contextualPredicate = new Predicate() - { - @Override - public boolean apply(String input) - { - return predicateFactory.applyInContext(cx, input); - } - }; - - return Filters.estimatePredicateSelectivity(dimension, selector, contextualPredicate, totalNumRows); - } - finally { - Context.exit(); - } - } } diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index c825fe518684..01bb24260241 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -26,6 +26,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.LikeDimFilter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.Indexed; @@ -52,10 +53,10 @@ public LikeFilter( @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - if (extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY) { + if (directPrefixMatchable()) { // dimension equals prefix return selector.getBitmapIndex(dimension, likeMatcher.getPrefix()); - } else if (extractionFn == null && !likeMatcher.getPrefix().isEmpty()) { + } else if (directPrefixAndSuffixMatchable()) { // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); @@ -67,21 +68,9 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points final Indexed dimValues = selector.getDimensionValues(dimension); - final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); - final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; - final int startIndex; // inclusive - final int endIndex; // exclusive - - final int lowerFound = bitmapIndex.getIndex(lower); - startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); - - final int upperFound = bitmapIndex.getIndex(upper); - endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); - // Union bitmaps for all matching dimension values in range. // Use lazy iterator to allow unioning bitmaps one by one and avoid materializing all of them at once. - return selector.getBitmapFactory() - .union(getBitmapIterator(startIndex, endIndex, bitmapIndex, likeMatcher, dimValues)); + return selector.getBitmapFactory().union(getBitmapIterator(bitmapIndex, likeMatcher, dimValues)); } else { // fallback return Filters.matchPredicate( @@ -93,26 +82,15 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) } @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) - { - return Filters.makeValueMatcher(factory, dimension, likeMatcher.predicateFactory(extractionFn)); - } - - @Override - public boolean supportsBitmapIndex(BitmapIndexSelector selector) - { - return selector.getBitmapIndex(dimension) != null; - } - - @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - if (extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY) { + if (directPrefixMatchable()) { // dimension equals prefix - return (double) selector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() / totalNumRows; - } else if (extractionFn == null && !likeMatcher.getPrefix().isEmpty()) { + return (double) indexSelector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() + / indexSelector.getNumRows(); + } else if (directPrefixAndSuffixMatchable()) { // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); if (bitmapIndex == null) { // Treat this as a column full of nulls @@ -120,54 +98,65 @@ public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRow } // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points - final Indexed dimValues = selector.getDimensionValues(dimension); - - final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); - final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; - final int startIndex; // inclusive - final int endIndex; // exclusive - - final int lowerFound = bitmapIndex.getIndex(lower); - startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); - - final int upperFound = bitmapIndex.getIndex(upper); - endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); + final Indexed dimValues = indexSelector.getDimensionValues(dimension); // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. - final Iterator iterator = getBitmapIterator( - startIndex, - endIndex, - bitmapIndex, - likeMatcher, - dimValues - ).iterator(); - - long matchRowNum = 0; - while (iterator.hasNext()) { - final ImmutableBitmap bitmap = iterator.next(); - matchRowNum += bitmap.size(); - } - - return (double) matchRowNum / totalNumRows; + return Filters.estimatePredicateSelectivity( + columnSelector, + dimension, + getBitmapIterator(bitmapIndex, likeMatcher, dimValues), + indexSelector.getNumRows() + ); } else { // fallback return Filters.estimatePredicateSelectivity( + columnSelector, dimension, - selector, - likeMatcher.predicateFactory(extractionFn).makeStringPredicate(), - totalNumRows + indexSelector, + likeMatcher.predicateFactory(extractionFn).makeStringPredicate() ); } } + private boolean directPrefixMatchable() + { + return extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; + } + + private boolean directPrefixAndSuffixMatchable() + { + return extractionFn == null && !likeMatcher.getPrefix().isEmpty(); + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, dimension, likeMatcher.predicateFactory(extractionFn)); + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return selector.getBitmapIndex(dimension) != null; + } + private static Iterable getBitmapIterator( - final int startIndex, - final int endIndex, final BitmapIndex bitmapIndex, final LikeDimFilter.LikeMatcher likeMatcher, final Indexed dimValues ) { + final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); + final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; + final int startIndex; // inclusive + final int endIndex; // exclusive + + final int lowerFound = bitmapIndex.getIndex(lower); + startIndex = lowerFound >= 0 ? lowerFound : -(lowerFound + 1); + + final int upperFound = bitmapIndex.getIndex(upper); + endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); + return new Iterable() { @Override diff --git a/processing/src/main/java/io/druid/segment/filter/NotFilter.java b/processing/src/main/java/io/druid/segment/filter/NotFilter.java index 44eb180c1a5b..8358a5cee9cc 100644 --- a/processing/src/main/java/io/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/NotFilter.java @@ -23,6 +23,7 @@ import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -69,9 +70,9 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - return 1. - baseFilter.estimateSelectivity(selector, totalNumRows); + return 1. - baseFilter.estimateSelectivity(columnSelector, indexSelector); } public Filter getBaseFilter() diff --git a/processing/src/main/java/io/druid/segment/filter/OrFilter.java b/processing/src/main/java/io/druid/segment/filter/OrFilter.java index be173764587c..98daed553178 100644 --- a/processing/src/main/java/io/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/OrFilter.java @@ -27,6 +27,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.RowOffsetMatcherFactory; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import java.util.ArrayList; @@ -156,12 +157,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { // Estimate selectivity with attribute value independence assumption double selectivity = 0; for (final Filter filter : filters) { - selectivity += filter.estimateSelectivity(selector, totalNumRows); + selectivity += filter.estimateSelectivity(columnSelector, indexSelector); } return Math.min(selectivity, 1.); } diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index b514ba594cad..ed76732c58a1 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -23,6 +23,7 @@ import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; /** @@ -60,9 +61,9 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - return (double) selector.getBitmapIndex(dimension, value).size() / totalNumRows; + return (double) indexSelector.getBitmapIndex(dimension, value).size() / indexSelector.getNumRows(); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index 41960b4f0c22..b2de0ece2eb9 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -27,6 +27,7 @@ import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.incremental.SpatialDimensionRowTransformer; @@ -102,12 +103,13 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - long matchRowNum = 0; - for (final ImmutableBitmap bitmap : selector.getSpatialIndex(dimension).search(bound)) { - matchRowNum += bitmap.size(); - } - return (double) matchRowNum / totalNumRows; + // handle rtree overlap + return Filters.estimatePredicateSelectivity( + indexSelector.getSpatialIndex(dimension).search(bound), + indexSelector.getNumRows(), + true + ); } } diff --git a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java index 1aadf486064e..0d6d7aae11d4 100644 --- a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java @@ -43,6 +43,7 @@ import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.groupby.RowBasedColumnSelectorFactory; +import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; @@ -387,7 +388,7 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(BitmapIndexSelector selector, long totalNumRows) + public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { return 1.0; } From 7dc4e4f9dfb0a74421a8a84a278319dca1562d59 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Tue, 24 Jan 2017 20:31:30 +0900 Subject: [PATCH 3/9] Lazy bitmap materialization for bitmap sampling and java docs --- .../benchmark/query/SearchBenchmark.java | 252 ++++++++++-------- .../io/druid/segment/filter/BoundFilter.java | 21 +- .../java/io/druid/segment/filter/Filters.java | 238 ++++++++++++++--- .../io/druid/segment/filter/InFilter.java | 22 +- .../io/druid/segment/filter/LikeFilter.java | 73 +++-- .../druid/segment/filter/SpatialFilter.java | 2 +- 6 files changed, 424 insertions(+), 184 deletions(-) diff --git a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java index 205261754bd8..c295702b94aa 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/query/SearchBenchmark.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.hash.Hashing; @@ -39,6 +40,7 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.java.util.common.logger.Logger; import io.druid.query.Druids; +import io.druid.query.Druids.SearchQueryBuilder; import io.druid.query.FinalizeResultsQueryRunner; import io.druid.query.Query; import io.druid.query.QueryRunner; @@ -154,143 +156,156 @@ public int columnCacheSizeBytes() private void setupQueries() { // queries for the basic schema - Map basicQueries = new LinkedHashMap<>(); - BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic"); + final Map basicQueries = new LinkedHashMap<>(); + final BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic"); - { // basic.A - QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - - Druids.SearchQueryBuilder queryBuilderA = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("123"); - - basicQueries.put("A", queryBuilderA); + final List queryTypes = ImmutableList.of("A", "B", "C", "D"); + for (final String eachType : queryTypes) { + basicQueries.put(eachType, makeQuery(eachType, basicSchema)); } - { // basic.B - final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + SCHEMA_QUERY_MAP.put("basic", basicQueries); + } - final List dimUniformFilterVals = Lists.newArrayList(); - int resultNum = (int) (100000 * 0.1); - int step = 100000 / resultNum; - for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { - dimUniformFilterVals.add(String.valueOf(i)); - } + private static SearchQueryBuilder makeQuery(final String name, final BenchmarkSchemaInfo basicSchema) + { + switch (name) { + case "A": + return basicA(basicSchema); + case "B": + return basicB(basicSchema); + case "C": + return basicC(basicSchema); + case "D": + return basicD(basicSchema); + default: + return null; + } + } - List dimHyperUniqueFilterVals = Lists.newArrayList(); - resultNum = (int) (100000 * 0.1); - step = 100000 / resultNum; - for (int i = 0; i < 100001 && dimHyperUniqueFilterVals.size() < resultNum; i += step) { - dimHyperUniqueFilterVals.add(String.valueOf(i)); - } + private static SearchQueryBuilder basicA(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - final List dimFilters = Lists.newArrayList(); - dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null)); - dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null)); + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("123"); + } - final Druids.SearchQueryBuilder queryBuilderB = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("") - .dimensions(Lists.newArrayList("dimUniform", "dimHyperUnique")) - .filters(new AndDimFilter(dimFilters)); + private static SearchQueryBuilder basicB(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - basicQueries.put("B", queryBuilderB); + final List dimUniformFilterVals = Lists.newArrayList(); + int resultNum = (int) (100000 * 0.1); + int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); } - { // basic.C - final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + List dimHyperUniqueFilterVals = Lists.newArrayList(); + resultNum = (int) (100000 * 0.1); + step = 100000 / resultNum; + for (int i = 0; i < 100001 && dimHyperUniqueFilterVals.size() < resultNum; i += step) { + dimHyperUniqueFilterVals.add(String.valueOf(i)); + } - final List dimUniformFilterVals = Lists.newArrayList(); - final int resultNum = (int) (100000 * 0.1); - final int step = 100000 / resultNum; - for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { - dimUniformFilterVals.add(String.valueOf(i)); - } + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null)); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform", "dimHyperUnique")) + .filters(new AndDimFilter(dimFilters)); + } - final String dimName = "dimUniform"; - final List dimFilters = Lists.newArrayList(); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance())); - dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())); - dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() - { - @Override - public byte[] getCacheKey() - { - return new byte[] {0xF}; - } + private static SearchQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); - @Override - public String apply(String value) - { - return String.valueOf(Long.parseLong(value) + 1); - } + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); + } - @Override - public boolean preservesOrdering() - { - return false; - } + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance())); + dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() + { + @Override + public byte[] getCacheKey() + { + return new byte[]{0xF}; + } - @Override - public ExtractionType getExtractionType() - { - return ExtractionType.ONE_TO_ONE; - } - }, null)); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null))); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null))); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3))); - - final Druids.SearchQueryBuilder queryBuilderC = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("") - .dimensions(Lists.newArrayList("dimUniform")) - .filters(new AndDimFilter(dimFilters)); - - basicQueries.put("C", queryBuilderC); - } + @Override + public String apply(String value) + { + return String.valueOf(Long.parseLong(value) + 1); + } - { // basic.D - final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + @Override + public boolean preservesOrdering() + { + return false; + } - final List dimUniformFilterVals = Lists.newArrayList(); - final int resultNum = (int) (100000 * 0.1); - final int step = 100000 / resultNum; - for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { - dimUniformFilterVals.add(String.valueOf(i)); + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.ONE_TO_ONE; } + }, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null))); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3))); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); + } - final String dimName = "dimUniform"; - final List dimFilters = Lists.newArrayList(); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); - dimFilters.add(new SelectorDimFilter(dimName, "3", null)); - dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); - dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); - - final Druids.SearchQueryBuilder queryBuilderC = - Druids.newSearchQueryBuilder() - .dataSource("blah") - .granularity(QueryGranularities.ALL) - .intervals(intervalSpec) - .query("") - .dimensions(Lists.newArrayList("dimUniform")) - .filters(new AndDimFilter(dimFilters)); - - basicQueries.put("D", queryBuilderC); + private static SearchQueryBuilder basicD(final BenchmarkSchemaInfo basicSchema) + { + final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval())); + + final List dimUniformFilterVals = Lists.newArrayList(); + final int resultNum = (int) (100000 * 0.1); + final int step = 100000 / resultNum; + for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) { + dimUniformFilterVals.add(String.valueOf(i)); } - SCHEMA_QUERY_MAP.put("basic", basicQueries); + final String dimName = "dimUniform"; + final List dimFilters = Lists.newArrayList(); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new SelectorDimFilter(dimName, "3", null)); + dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null)); + + return Druids.newSearchQueryBuilder() + .dataSource("blah") + .granularity(QueryGranularities.ALL) + .intervals(intervalSpec) + .query("") + .dimensions(Lists.newArrayList("dimUniform")) + .filters(new AndDimFilter(dimFilters)); } @Setup @@ -452,7 +467,10 @@ public void queryMultiQueryableIndex(Blackhole blackhole) throws Exception ); Sequence> queryResult = theRunner.run(query, Maps.newHashMap()); - List> results = Sequences.toList(queryResult, Lists.>newArrayList()); + List> results = Sequences.toList( + queryResult, + Lists.>newArrayList() + ); for (Result result : results) { List hits = result.getValue().getValue(); diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index a892fd01d77d..f149dbe7e753 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -86,9 +86,10 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele } return Filters.estimatePredicateSelectivity( + bitmapIndex, columnSelector, boundDimFilter.getDimension(), - getBitmapIterator(boundDimFilter, bitmapIndex), + getBitmapIndexIterator(boundDimFilter, bitmapIndex), indexSelector.getNumRows() ); } else { @@ -156,18 +157,26 @@ private static Iterable getBitmapIterator( final BoundDimFilter boundDimFilter, final BitmapIndex bitmapIndex ) + { + return Filters.bitmapsFromIndexes(getBitmapIndexIterator(boundDimFilter, bitmapIndex), bitmapIndex); + } + + private static Iterable getBitmapIndexIterator( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) { // search for start, end indexes in the bitmaps; then include all bitmaps between those points final Pair indexes = getStartEndIndexes(boundDimFilter, bitmapIndex); final int startIndex = indexes.lhs; final int endIndex = indexes.rhs; - return new Iterable() + return new Iterable() { @Override - public Iterator iterator() + public Iterator iterator() { - return new Iterator() + return new Iterator() { int currIndex = startIndex; @@ -178,9 +187,9 @@ public boolean hasNext() } @Override - public ImmutableBitmap next() + public Integer next() { - return bitmapIndex.getBitmap(currIndex++); + return currIndex++; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index ff404b3708fb..8eedbff352c5 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -24,6 +24,7 @@ import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.common.guava.GuavaUtils; @@ -53,7 +54,7 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; /** */ @@ -61,7 +62,6 @@ public class Filters { public static final List FILTERABLE_TYPES = ImmutableList.of(ValueType.STRING, ValueType.LONG); private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; - private static final Random random = new Random(System.currentTimeMillis()); private static final int SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION = 100; /** @@ -190,6 +190,29 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) .complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows()); } + /** + * Transform an iterable of indexes of bitmaps to an iterable of bitmaps + * + * @param indexes indexes of bitmaps + * @param bitmapIndex an object to retrieve bitmaps using indexes + * + * @return an iterable of bitmaps + */ + static Iterable bitmapsFromIndexes(final Iterable indexes, final BitmapIndex bitmapIndex) + { + return Iterables.transform( + indexes, + new Function() + { + @Override + public ImmutableBitmap apply(Integer index) + { + return bitmapIndex.getBitmap(index); + } + } + ); + } + /** * Return the union of bitmaps for all values matching a particular predicate. * @@ -199,7 +222,7 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) * * @return bitmap of matching rows * - * @see Filters#estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) + * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ public static ImmutableBitmap matchPredicate( final String dimension, @@ -233,7 +256,7 @@ public static ImmutableBitmap matchPredicate( * * @return estimated selectivity * - * @see Filters#matchPredicate(String, BitmapIndexSelector, Predicate) + * @see #matchPredicate(String, BitmapIndexSelector, Predicate) */ static double estimatePredicateSelectivity( final ColumnSelector columnSelector, @@ -255,9 +278,10 @@ static double estimatePredicateSelectivity( // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); return estimatePredicateSelectivity( + bitmapIndex, columnSelector, dimension, - makePredicateQualifyingBitmapIterable( + makePredicateQualifyingIndexIterable( bitmapIndex, predicate, dimValues @@ -267,55 +291,160 @@ static double estimatePredicateSelectivity( } static double estimatePredicateSelectivity( + BitmapIndex bitmapIndex, ColumnSelector columnSelector, String dimension, - Iterable bitmaps, + Iterable bitmapIndexes, long totalNumRows ) { final ColumnCapabilities columnCapabilities = columnSelector.getColumn(dimension).getCapabilities(); - return estimatePredicateSelectivity( - bitmaps, + return estimateSelectivityOfBitmapList( + bitmapIndex, + bitmapIndexes, totalNumRows, + // assume multi-value column if columnCapabilities is null columnCapabilities == null || columnCapabilities.hasMultipleValues() ); } - static double estimatePredicateSelectivity( + private static double estimateSelectivityOfBitmapList( + BitmapIndex bitmapIndex, + Iterable bitmapIndexeIterable, + long totalNumRows, + boolean isMultiValueDimension + ) + { + final List bitmapIndexes = ImmutableList.copyOf(bitmapIndexeIterable); + long numMatchedRows = 0; + for (Integer index : bitmapIndexes) { + final ImmutableBitmap bitmap = bitmapIndex.getBitmap(index); + numMatchedRows += bitmap.size(); + } + + if (isMultiValueDimension) { + final double estimated = numMatchedRows * computeNonOverlapRatioFromRandomBitmapSamples( + bitmapIndex, + bitmapIndexes + ) / totalNumRows; + return Math.min(1., estimated); + } else { + return (double) numMatchedRows / totalNumRows; + } + } + + static double estimateSelectivityOfBitmapTree( Iterable bitmaps, long totalNumRows, boolean isMultiValueDimension ) { long numMatchedRows = 0; - final List bitmapList = Lists.newArrayList(bitmaps); - for (ImmutableBitmap bitmap : bitmapList) { + for (ImmutableBitmap bitmap : bitmaps) { numMatchedRows += bitmap.size(); } - // assume multi-value column if columnCapabilities is null if (isMultiValueDimension) { - final double estimated = numMatchedRows * Filters.computeNonOverlapRatioFromBitmapSamples(bitmapList) - / totalNumRows; + final double estimated = numMatchedRows * computeNonOverlapRatioFromFirstNBitmapSamples( + bitmaps + ) / totalNumRows; return Math.min(1., estimated); } else { return (double) numMatchedRows / totalNumRows; } } - private static double computeNonOverlapRatioFromBitmapSamples(List bitmaps) + /** + * This method is to estimate how many bits of bitmaps are not overlapped in average. + * Since a multi-value dimension can have one or more values, one or more bitmaps for that dimension can be set for the same row. + * As a result, to get the exact size of unioned bitmaps, which is widely useful for query planning like + * filter selectivity estimation, expensive union operations of bitmaps are inevitable. + * To avoid such overhead, this method can be used to compute the approximate unioned size based on random sampling. + *

+ * The non-overlap ratio can be computed like below. + *

+ * nonOverlapRatio(b1, b2) = size(union(b1, b2)) / (size(b1) + size(b2)) + *

+ * Given bitmaps, this method calculates the non-overlap ratios of N bitmap samples, + * and then returns the average of them. + * + * @param bitmapIndex bitmap index to retrieve bitmaps + * @param bitmapIndexes a list of indexes of bitmaps + * + * @return approximated average non-overlap ratio of bitmaps + * + * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) + */ + private static double computeNonOverlapRatioFromRandomBitmapSamples( + BitmapIndex bitmapIndex, + List bitmapIndexes + ) { - final int sampleNum = Math.min(SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION, bitmaps.size()); + Preconditions.checkArgument(bitmapIndexes.size() > 0, "empty index list"); + double nonOverlapRatioSum = 0.; + int sampleNum = Math.min(bitmapIndexes.size(), SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION); for (int i = 0; i < sampleNum; i++) { - final ImmutableBitmap b1 = bitmaps.get(random.nextInt(bitmaps.size())); - final ImmutableBitmap b2 = bitmaps.get(random.nextInt(bitmaps.size())); + final ImmutableBitmap b1 = bitmapIndex.getBitmap( + bitmapIndexes.get(ThreadLocalRandom.current().nextInt(bitmapIndexes.size()))); + final ImmutableBitmap b2 = bitmapIndex.getBitmap( + bitmapIndexes.get(ThreadLocalRandom.current().nextInt(bitmapIndexes.size()))); nonOverlapRatioSum += b1.union(b2).size() / (b1.size() + b2.size()); } return nonOverlapRatioSum / sampleNum; } + /** + * This method is to estimate how many bits of bitmaps are not overlapped in average. + * Since a multi-value dimension can have one or more values, one or more bitmaps for that dimension can be set for the same row. + * As a result, to get the exact size of unioned bitmaps, which is widely useful for query planning like + * filter selectivity estimation, expensive union operations of bitmaps are inevitable. + * To avoid such overhead, this method can be used to compute the approximate unioned size based on sampling. + *

+ * The non-overlap ratio can be computed like below. + *

+ * nonOverlapRatio(b1, b2) = size(union(b1, b2)) / (size(b1) + size(b2)) + *

+ * Given bitmaps, this method calculates the non-overlap ratios of the first N bitmap samples, + * and then returns the average of them. + * + * @param bitmaps An iterable of bitmaps + * + * @return approximated average non-overlap ratio of bitmaps + * + * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) + */ + static double computeNonOverlapRatioFromFirstNBitmapSamples( + Iterable bitmaps + ) + { + final Iterator iterator = bitmaps.iterator(); + Preconditions.checkArgument(iterator.hasNext(), "empty iterator"); + + double nonOverlapRatioSum = 0.; + int sampleNum; + ImmutableBitmap b1 = iterator.next(), b2; + + if (iterator.hasNext()) { + b2 = iterator.next(); + + for (sampleNum = 1; sampleNum <= SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; sampleNum++) { + nonOverlapRatioSum += b1.union(b2).size() / (b1.size() + b2.size()); + + if (iterator.hasNext()) { + b1 = b2; + b2 = iterator.next(); + } else { + break; + } + } + return nonOverlapRatioSum / sampleNum; + } else { + return 1.; + } + } + private static Iterable makePredicateQualifyingBitmapIterable( final BitmapIndex bitmapIndex, final Predicate predicate, @@ -328,43 +457,84 @@ private static Iterable makePredicateQualifyingBitmapIterable( public Iterator iterator() { return new Iterator() + { + final Iterator indexIterator = makePredicateQualifyingIndexIterable( + bitmapIndex, + predicate, + dimValues + ).iterator(); + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNext() + { + return indexIterator.hasNext(); + } + + @Override + public ImmutableBitmap next() + { + return bitmapIndex.getBitmap(indexIterator.next()); + } + }; + } + }; + } + + private static Iterable makePredicateQualifyingIndexIterable( + final BitmapIndex bitmapIndex, + final Predicate predicate, + final Indexed dimValues + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() { private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); private int nextIndex = 0; - private ImmutableBitmap nextBitmap; + private Integer found = null; { - findNextBitmap(); + found = findNextBitmap(); } - private void findNextBitmap() + private Integer findNextBitmap() { - while (nextIndex < bitmapIndexCardinality) { - if (predicate.apply(dimValues.get(nextIndex))) { - nextBitmap = bitmapIndex.getBitmap(nextIndex); - nextIndex++; - return; - } + while (nextIndex < bitmapIndexCardinality && !predicate.apply(dimValues.get(nextIndex))) { nextIndex++; } - nextBitmap = null; + + if (nextIndex < bitmapIndexCardinality) { + return nextIndex++; + } else { + return null; + } } @Override public boolean hasNext() { - return nextBitmap != null; + return found != null; } @Override - public ImmutableBitmap next() + public Integer next() { - ImmutableBitmap bitmap = nextBitmap; - if (bitmap == null) { + Integer found = this.found; + if (found == null) { throw new NoSuchElementException(); } - findNextBitmap(); - return bitmap; + this.found = findNextBitmap(); + return found; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index dab73a337245..6f1bd9e5cd7f 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -33,6 +33,7 @@ import io.druid.query.filter.ValueMatcher; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.column.BitmapIndex; import java.util.Set; @@ -62,7 +63,8 @@ public InFilter( public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { if (extractionFn == null) { - return selector.getBitmapFactory().union(getBitmapIterable(selector)); + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + return selector.getBitmapFactory().union(getBitmapIterable(bitmapIndex)); } else { return Filters.matchPredicate( dimension, @@ -76,10 +78,12 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { if (extractionFn == null) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); return Filters.estimatePredicateSelectivity( + bitmapIndex, columnSelector, dimension, - getBitmapIterable(indexSelector), + getBitmapIndexIterable(bitmapIndex), indexSelector.getNumRows() ); } else { @@ -92,15 +96,21 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele } } - private Iterable getBitmapIterable(final BitmapIndexSelector selector) + private Iterable getBitmapIterable(final BitmapIndex bitmapIndex) + { + return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex); + } + + private Iterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) { return Iterables.transform( - values, new Function() + values, + new Function() { @Override - public ImmutableBitmap apply(String value) + public Integer apply(String value) { - return selector.getBitmapIndex(dimension, value); + return bitmapIndex.getIndex(value); } } ); diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 01bb24260241..1e1e2d442e37 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -32,6 +32,7 @@ import io.druid.segment.data.Indexed; import java.util.Iterator; +import java.util.NoSuchElementException; public class LikeFilter implements Filter { @@ -53,10 +54,10 @@ public LikeFilter( @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - if (directPrefixMatchable()) { + if (emptyExtractFn() && emptySuffixMatch()) { // dimension equals prefix return selector.getBitmapIndex(dimension, likeMatcher.getPrefix()); - } else if (directPrefixAndSuffixMatchable()) { + } else if (emptyExtractFn() && nonEmptyPrefix()) { // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); @@ -84,11 +85,11 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) @Override public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { - if (directPrefixMatchable()) { + if (emptyExtractFn() && emptySuffixMatch()) { // dimension equals prefix return (double) indexSelector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() / indexSelector.getNumRows(); - } else if (directPrefixAndSuffixMatchable()) { + } else if (emptyExtractFn() && nonEmptyPrefix()) { // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); @@ -102,9 +103,10 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. return Filters.estimatePredicateSelectivity( + bitmapIndex, columnSelector, dimension, - getBitmapIterator(bitmapIndex, likeMatcher, dimValues), + getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), indexSelector.getNumRows() ); } else { @@ -118,14 +120,19 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele } } - private boolean directPrefixMatchable() + private boolean emptyExtractFn() { - return extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; + return extractionFn == null; } - private boolean directPrefixAndSuffixMatchable() + private boolean emptySuffixMatch() { - return extractionFn == null && !likeMatcher.getPrefix().isEmpty(); + return likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; + } + + private boolean nonEmptyPrefix() + { + return !likeMatcher.getPrefix().isEmpty(); } @Override @@ -145,6 +152,15 @@ private static Iterable getBitmapIterator( final LikeDimFilter.LikeMatcher likeMatcher, final Indexed dimValues ) + { + return Filters.bitmapsFromIndexes(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), bitmapIndex); + } + + private static Iterable getBitmapIndexIterator( + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) { final String lower = Strings.nullToEmpty(likeMatcher.getPrefix()); final String upper = Strings.nullToEmpty(likeMatcher.getPrefix()) + Character.MAX_VALUE; @@ -157,33 +173,50 @@ private static Iterable getBitmapIterator( final int upperFound = bitmapIndex.getIndex(upper); endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); - return new Iterable() + return new Iterable() { @Override - public Iterator iterator() + public Iterator iterator() { - return new Iterator() + return new Iterator() { int currIndex = startIndex; + Integer found; - @Override - public boolean hasNext() { - return currIndex < endIndex; + found = findNext(); } - @Override - public ImmutableBitmap next() + private Integer findNext() { while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues.get(currIndex))) { currIndex++; } - if (currIndex == endIndex) { - return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); + if (currIndex < endIndex) { + return currIndex++; + } else { + return null; + } + } + + @Override + public boolean hasNext() + { + return found != null; + } + + @Override + public Integer next() + { + Integer cur = found; + + if (cur == null) { + throw new NoSuchElementException(); } - return bitmapIndex.getBitmap(currIndex++); + found = findNext(); + return cur; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index b2de0ece2eb9..4898d306a808 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -106,7 +106,7 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) { // handle rtree overlap - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivityOfBitmapTree( indexSelector.getSpatialIndex(dimension).search(bound), indexSelector.getNumRows(), true From c121c62c05912c2d2af690f780f2686952e6188d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 25 Jan 2017 17:52:36 +0900 Subject: [PATCH 4/9] Addressed comments. - Fix wrong non-overlap ratio computation and added unit tests. - Change Iterable to IntIterable - Remove unnecessary Iterable --- .../io/druid/segment/IntIteratorUtils.java | 12 + .../io/druid/segment/filter/BoundFilter.java | 45 ++- .../java/io/druid/segment/filter/Filters.java | 259 +++++++++--------- .../io/druid/segment/filter/InFilter.java | 37 ++- .../io/druid/segment/filter/LikeFilter.java | 45 +-- .../io/druid/segment/filter/FiltersTest.java | 214 +++++++++++++++ 6 files changed, 425 insertions(+), 187 deletions(-) create mode 100644 processing/src/test/java/io/druid/segment/filter/FiltersTest.java diff --git a/processing/src/main/java/io/druid/segment/IntIteratorUtils.java b/processing/src/main/java/io/druid/segment/IntIteratorUtils.java index 45e205a17eae..75189b7511d3 100644 --- a/processing/src/main/java/io/druid/segment/IntIteratorUtils.java +++ b/processing/src/main/java/io/druid/segment/IntIteratorUtils.java @@ -22,8 +22,11 @@ import com.metamx.common.IAE; import com.metamx.common.guava.MergeIterator; import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntIterators; +import it.unimi.dsi.fastutil.ints.IntList; +import it.unimi.dsi.fastutil.ints.IntLists; import it.unimi.dsi.fastutil.longs.LongHeaps; import java.util.List; @@ -193,5 +196,14 @@ public int skip(int n) } } + public static IntList toIntList(IntIterator iterator) + { + final IntList integers = new IntArrayList(); + while (iterator.hasNext()) { + integers.add(iterator.nextInt()); + } + return IntLists.unmodifiable(integers); + } + private IntIteratorUtils() {} } diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index f149dbe7e753..1b4882feb698 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -33,10 +33,14 @@ import io.druid.query.ordering.StringComparators; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntIterators; +import it.unimi.dsi.fastutil.ints.IntList; import java.util.Comparator; -import java.util.Iterator; public class BoundFilter implements Filter { @@ -89,7 +93,7 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele bitmapIndex, columnSelector, boundDimFilter.getDimension(), - getBitmapIndexIterator(boundDimFilter, bitmapIndex), + getBitmapIndexList(boundDimFilter, bitmapIndex), indexSelector.getNumRows() ); } else { @@ -161,7 +165,15 @@ private static Iterable getBitmapIterator( return Filters.bitmapsFromIndexes(getBitmapIndexIterator(boundDimFilter, bitmapIndex), bitmapIndex); } - private static Iterable getBitmapIndexIterator( + private static IntList getBitmapIndexList( + final BoundDimFilter boundDimFilter, + final BitmapIndex bitmapIndex + ) + { + return IntIteratorUtils.toIntList(getBitmapIndexIterator(boundDimFilter, bitmapIndex).iterator()); + } + + private static IntIterable getBitmapIndexIterator( final BoundDimFilter boundDimFilter, final BitmapIndex bitmapIndex ) @@ -171,33 +183,12 @@ private static Iterable getBitmapIndexIterator( final int startIndex = indexes.lhs; final int endIndex = indexes.rhs; - return new Iterable() + return new IntIterable() { @Override - public Iterator iterator() + public IntIterator iterator() { - return new Iterator() - { - int currIndex = startIndex; - - @Override - public boolean hasNext() - { - return currIndex < endIndex; - } - - @Override - public Integer next() - { - return currIndex++; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; + return IntIterators.fromTo(startIndex, endIndex); } }; } diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 8eedbff352c5..d70dcce6d406 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -24,7 +24,6 @@ import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.common.guava.GuavaUtils; @@ -44,11 +43,16 @@ import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionHandlerUtils; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.LongColumnSelector; import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntList; import java.util.ArrayList; import java.util.Iterator; @@ -62,7 +66,7 @@ public class Filters { public static final List FILTERABLE_TYPES = ImmutableList.of(ValueType.STRING, ValueType.LONG); private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; - private static final int SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION = 100; + static final int SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION = 100; /** * Convert a list of DimFilters to a list of Filters. @@ -193,24 +197,43 @@ public static ImmutableBitmap allTrue(final BitmapIndexSelector selector) /** * Transform an iterable of indexes of bitmaps to an iterable of bitmaps * - * @param indexes indexes of bitmaps + * @param indexes indexes of bitmaps * @param bitmapIndex an object to retrieve bitmaps using indexes * * @return an iterable of bitmaps */ - static Iterable bitmapsFromIndexes(final Iterable indexes, final BitmapIndex bitmapIndex) + static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) { - return Iterables.transform( - indexes, - new Function() + // Do not use Iterables.transform() to avoid boxing/unboxing integers. + return new Iterable() + { + @Override + public Iterator iterator() + { + final IntIterator iterator = indexes.iterator(); + + return new Iterator() { @Override - public ImmutableBitmap apply(Integer index) + public boolean hasNext() { - return bitmapIndex.getBitmap(index); + return iterator.hasNext(); } - } - ); + + @Override + public ImmutableBitmap next() + { + return bitmapIndex.getBitmap(iterator.nextInt()); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; } /** @@ -281,11 +304,7 @@ static double estimatePredicateSelectivity( bitmapIndex, columnSelector, dimension, - makePredicateQualifyingIndexIterable( - bitmapIndex, - predicate, - dimValues - ), + IntIteratorUtils.toIntList(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues).iterator()), indexSelector.getNumRows() ); } @@ -294,7 +313,7 @@ static double estimatePredicateSelectivity( BitmapIndex bitmapIndex, ColumnSelector columnSelector, String dimension, - Iterable bitmapIndexes, + IntList bitmapIndexes, long totalNumRows ) { @@ -308,29 +327,31 @@ static double estimatePredicateSelectivity( ); } - private static double estimateSelectivityOfBitmapList( + static double estimateSelectivityOfBitmapList( BitmapIndex bitmapIndex, - Iterable bitmapIndexeIterable, + IntList bitmapIndexes, long totalNumRows, boolean isMultiValueDimension ) { - final List bitmapIndexes = ImmutableList.copyOf(bitmapIndexeIterable); - long numMatchedRows = 0; - for (Integer index : bitmapIndexes) { - final ImmutableBitmap bitmap = bitmapIndex.getBitmap(index); - numMatchedRows += bitmap.size(); + double numMatchedRows = bitmapIndexes.size() > 0 ? bitmapIndex.getBitmap(bitmapIndexes.get(0)).size() : 0; + final double nonOverlapRatio = isMultiValueDimension && bitmapIndexes.size() > 1 + ? bitmapIndexes.size() > SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION * 5 + ? computeNonOverlapRatioFromRandomBitmapSamples(bitmapIndex, bitmapIndexes) + : computeNonOverlapRatioFromFirstNBitmapSamples( + bitmapsFromIndexes( + bitmapIndexes, + bitmapIndex + ) + ) + : 1.; + + for (int i = 1; i < bitmapIndexes.size(); i++) { + final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmapIndexes.get(i)); + numMatchedRows += bitmap.size() * nonOverlapRatio; } - if (isMultiValueDimension) { - final double estimated = numMatchedRows * computeNonOverlapRatioFromRandomBitmapSamples( - bitmapIndex, - bitmapIndexes - ) / totalNumRows; - return Math.min(1., estimated); - } else { - return (double) numMatchedRows / totalNumRows; - } + return Math.min(1., numMatchedRows / totalNumRows); } static double estimateSelectivityOfBitmapTree( @@ -339,19 +360,21 @@ static double estimateSelectivityOfBitmapTree( boolean isMultiValueDimension ) { - long numMatchedRows = 0; - for (ImmutableBitmap bitmap : bitmaps) { - numMatchedRows += bitmap.size(); + final Iterator iterator = bitmaps.iterator(); + double numMatchedRows = 0; + if (iterator.hasNext()) { + numMatchedRows = iterator.next().size(); + final double nonOverlapRatio = isMultiValueDimension && iterator.hasNext() + ? computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps) + : 1.; + + while (iterator.hasNext()) { + final ImmutableBitmap bitmap = iterator.next(); + numMatchedRows += bitmap.size() * nonOverlapRatio; + } } - if (isMultiValueDimension) { - final double estimated = numMatchedRows * computeNonOverlapRatioFromFirstNBitmapSamples( - bitmaps - ) / totalNumRows; - return Math.min(1., estimated); - } else { - return (double) numMatchedRows / totalNumRows; - } + return Math.min(1., numMatchedRows / totalNumRows); } /** @@ -363,7 +386,12 @@ static double estimateSelectivityOfBitmapTree( *

* The non-overlap ratio can be computed like below. *

- * nonOverlapRatio(b1, b2) = size(union(b1, b2)) / (size(b1) + size(b2)) + * overlapSize = size(b1) + size(b2) - size(union(b1, b2)) + * nonOverlapRatio(b2) = (size(b2) - overlapSize) / size(b2) + *

+ * The approximate unioned size is + *

+ * unionedSize = size(b1) + size(b2) * nonOverlapRatio(b2) *

* Given bitmaps, this method calculates the non-overlap ratios of N bitmap samples, * and then returns the average of them. @@ -371,26 +399,34 @@ static double estimateSelectivityOfBitmapTree( * @param bitmapIndex bitmap index to retrieve bitmaps * @param bitmapIndexes a list of indexes of bitmaps * - * @return approximated average non-overlap ratio of bitmaps + * @return approximated average non-overlap ratio of bitmaps. * * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ - private static double computeNonOverlapRatioFromRandomBitmapSamples( + static double computeNonOverlapRatioFromRandomBitmapSamples( BitmapIndex bitmapIndex, - List bitmapIndexes + IntList bitmapIndexes ) { - Preconditions.checkArgument(bitmapIndexes.size() > 0, "empty index list"); + Preconditions.checkArgument(bitmapIndexes.size() > 1, "require at least two elements"); + final int sampleNum = Math.min(bitmapIndexes.size(), SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION); + final ThreadLocalRandom random = ThreadLocalRandom.current(); + ImmutableBitmap unioned = bitmapIndex.getBitmap(bitmapIndexes.get(random.nextInt(bitmapIndexes.size()))); + int unionedSize = unioned.size(); double nonOverlapRatioSum = 0.; - int sampleNum = Math.min(bitmapIndexes.size(), SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION); + for (int i = 0; i < sampleNum; i++) { - final ImmutableBitmap b1 = bitmapIndex.getBitmap( - bitmapIndexes.get(ThreadLocalRandom.current().nextInt(bitmapIndexes.size()))); - final ImmutableBitmap b2 = bitmapIndex.getBitmap( - bitmapIndexes.get(ThreadLocalRandom.current().nextInt(bitmapIndexes.size()))); + final ImmutableBitmap b = bitmapIndex.getBitmap(bitmapIndexes.get(random.nextInt(bitmapIndexes.size()))); + final int bSize = b.size(); + final int preUnionedSize = unionedSize; - nonOverlapRatioSum += b1.union(b2).size() / (b1.size() + b2.size()); + unioned = unioned.union(b); + unionedSize = unioned.size(); + + final int overlapSize = (preUnionedSize + bSize) - unionedSize; + final double nonOverlapRatio = (double) (bSize - overlapSize) / bSize; + nonOverlapRatioSum += nonOverlapRatio; } return nonOverlapRatioSum / sampleNum; } @@ -404,45 +440,46 @@ private static double computeNonOverlapRatioFromRandomBitmapSamples( *

* The non-overlap ratio can be computed like below. *

- * nonOverlapRatio(b1, b2) = size(union(b1, b2)) / (size(b1) + size(b2)) + * overlapSize = size(b1) + size(b2) - size(union(b1, b2)) + * nonOverlapRatio(b2) = (size(b2) - overlapSize) / size(b2) + *

+ * The approximate unioned size is + *

+ * unionedSize = size(b1) + size(b2) * nonOverlapRatio(b2) *

* Given bitmaps, this method calculates the non-overlap ratios of the first N bitmap samples, * and then returns the average of them. * - * @param bitmaps An iterable of bitmaps + * @param bitmaps An iterable of bitmaps * - * @return approximated average non-overlap ratio of bitmaps + * @return approximated average non-overlap ratio of bitmaps. * * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ - static double computeNonOverlapRatioFromFirstNBitmapSamples( - Iterable bitmaps - ) + static double computeNonOverlapRatioFromFirstNBitmapSamples(Iterable bitmaps) { final Iterator iterator = bitmaps.iterator(); Preconditions.checkArgument(iterator.hasNext(), "empty iterator"); + ImmutableBitmap unioned = iterator.next(); + Preconditions.checkArgument(iterator.hasNext(), "require at least two elements"); double nonOverlapRatioSum = 0.; - int sampleNum; - ImmutableBitmap b1 = iterator.next(), b2; + int sampleNum = 0; + int unionedSize = unioned.size(); - if (iterator.hasNext()) { - b2 = iterator.next(); + while (iterator.hasNext() && sampleNum++ < SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION) { + final ImmutableBitmap b = iterator.next(); + final int bSize = b.size(); + final int preUnionedSize = unionedSize; - for (sampleNum = 1; sampleNum <= SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; sampleNum++) { - nonOverlapRatioSum += b1.union(b2).size() / (b1.size() + b2.size()); + unioned = unioned.union(b); + unionedSize = unioned.size(); - if (iterator.hasNext()) { - b1 = b2; - b2 = iterator.next(); - } else { - break; - } - } - return nonOverlapRatioSum / sampleNum; - } else { - return 1.; + final int overlapSize = (preUnionedSize + bSize) - unionedSize; + final double nonOverlapRatio = (double) (bSize - overlapSize) / bSize; + nonOverlapRatioSum += nonOverlapRatio; } + return nonOverlapRatioSum / sampleNum; } private static Iterable makePredicateQualifyingBitmapIterable( @@ -451,63 +488,31 @@ private static Iterable makePredicateQualifyingBitmapIterable( final Indexed dimValues ) { - return new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - final Iterator indexIterator = makePredicateQualifyingIndexIterable( - bitmapIndex, - predicate, - dimValues - ).iterator(); - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasNext() - { - return indexIterator.hasNext(); - } - - @Override - public ImmutableBitmap next() - { - return bitmapIndex.getBitmap(indexIterator.next()); - } - }; - } - }; + return bitmapsFromIndexes(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues), bitmapIndex); } - private static Iterable makePredicateQualifyingIndexIterable( + private static IntIterable makePredicateQualifyingIndexIterable( final BitmapIndex bitmapIndex, final Predicate predicate, final Indexed dimValues ) { - return new Iterable() + return new IntIterable() { @Override - public Iterator iterator() + public IntIterator iterator() { - return new Iterator() + return new AbstractIntIterator() { private final int bitmapIndexCardinality = bitmapIndex.getCardinality(); private int nextIndex = 0; - private Integer found = null; + private int found = -1; { - found = findNextBitmap(); + found = findNextIndex(); } - private Integer findNextBitmap() + private int findNextIndex() { while (nextIndex < bitmapIndexCardinality && !predicate.apply(dimValues.get(nextIndex))) { nextIndex++; @@ -516,31 +521,25 @@ private Integer findNextBitmap() if (nextIndex < bitmapIndexCardinality) { return nextIndex++; } else { - return null; + return -1; } } @Override public boolean hasNext() { - return found != null; + return found != -1; } @Override - public Integer next() + public int nextInt() { - Integer found = this.found; - if (found == null) { + int foundIndex = this.found; + if (foundIndex == -1) { throw new NoSuchElementException(); } - this.found = findNextBitmap(); - return found; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); + this.found = findNextIndex(); + return foundIndex; } }; } diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 6f1bd9e5cd7f..053b55e2ac7e 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -19,11 +19,9 @@ package io.druid.segment.filter; -import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.base.Supplier; -import com.google.common.collect.Iterables; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; @@ -33,8 +31,13 @@ import io.druid.query.filter.ValueMatcher; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import java.util.Iterator; import java.util.Set; /** @@ -83,7 +86,7 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele bitmapIndex, columnSelector, dimension, - getBitmapIndexIterable(bitmapIndex), + IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), indexSelector.getNumRows() ); } else { @@ -101,19 +104,31 @@ private Iterable getBitmapIterable(final BitmapIndex bitmapInde return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex); } - private Iterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) + private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) { - return Iterables.transform( - values, - new Function() + return new IntIterable() + { + @Override + public IntIterator iterator() + { + return new AbstractIntIterator() { + Iterator iterator = values.iterator(); + @Override - public Integer apply(String value) + public boolean hasNext() { - return bitmapIndex.getIndex(value); + return iterator.hasNext(); } - } - ); + + @Override + public int nextInt() + { + return bitmapIndex.getIndex(iterator.next()); + } + }; + } + }; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 1e1e2d442e37..85a2d3e7d0fb 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -28,10 +28,14 @@ import io.druid.query.filter.ValueMatcher; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.IntIteratorUtils; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.Indexed; +import it.unimi.dsi.fastutil.ints.AbstractIntIterator; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntList; -import java.util.Iterator; import java.util.NoSuchElementException; public class LikeFilter implements Filter @@ -106,7 +110,7 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele bitmapIndex, columnSelector, dimension, - getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), + getBitmapIndexList(bitmapIndex, likeMatcher, dimValues), indexSelector.getNumRows() ); } else { @@ -156,7 +160,16 @@ private static Iterable getBitmapIterator( return Filters.bitmapsFromIndexes(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), bitmapIndex); } - private static Iterable getBitmapIndexIterator( + private static IntList getBitmapIndexList( + final BitmapIndex bitmapIndex, + final LikeDimFilter.LikeMatcher likeMatcher, + final Indexed dimValues + ) + { + return IntIteratorUtils.toIntList(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues).iterator()); + } + + private static IntIterable getBitmapIndexIterator( final BitmapIndex bitmapIndex, final LikeDimFilter.LikeMatcher likeMatcher, final Indexed dimValues @@ -173,21 +186,21 @@ private static Iterable getBitmapIndexIterator( final int upperFound = bitmapIndex.getIndex(upper); endIndex = upperFound >= 0 ? upperFound + 1 : -(upperFound + 1); - return new Iterable() + return new IntIterable() { @Override - public Iterator iterator() + public IntIterator iterator() { - return new Iterator() + return new AbstractIntIterator() { int currIndex = startIndex; - Integer found; + int found = -1; { found = findNext(); } - private Integer findNext() + private int findNext() { while (currIndex < endIndex && !likeMatcher.matchesSuffixOnly(dimValues.get(currIndex))) { currIndex++; @@ -196,34 +209,28 @@ private Integer findNext() if (currIndex < endIndex) { return currIndex++; } else { - return null; + return -1; } } @Override public boolean hasNext() { - return found != null; + return found != -1; } @Override - public Integer next() + public int nextInt() { - Integer cur = found; + int cur = found; - if (cur == null) { + if (cur == -1) { throw new NoSuchElementException(); } found = findNext(); return cur; } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } }; } }; diff --git a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java new file mode 100644 index 000000000000..9f0de459779b --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java @@ -0,0 +1,214 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.segment.IntIteratorUtils; +import io.druid.segment.column.BitmapIndex; +import it.unimi.dsi.fastutil.ints.IntIterators; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class FiltersTest +{ + @Test + public void testComputeNonOverlapRatioFromRandomBitmapSamplesWithFullyOverlappedBitmaps() + { + final int bitmapNum = 10; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + final BitmapIndex bitmapIndex = makeFullyOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.computeNonOverlapRatioFromRandomBitmapSamples( + bitmapIndex, + IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)) + ); + final double expected = 0.0; + assertEquals(expected, estimated, 0.00001); + } + + @Test(expected = IllegalArgumentException.class) + public void testComputeNonOverlapRatioFromRandomBitmapSamplesWithEmptyBitmaps() + { + final List bitmaps = Lists.newArrayList(); + final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); + Filters.computeNonOverlapRatioFromRandomBitmapSamples( + bitmapIndex, + IntIteratorUtils.toIntList(IntIterators.EMPTY_ITERATOR) + ); + } + + @Test + public void testComputeNonOverlapRatioFromFirstNBitmapSamplesWithNonOverlapBitmaps() throws Exception + { + final int bitmapNum = 10; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + makeNonOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); + final double expected = 1.0; + assertEquals(expected, estimated, 0.00001); + } + + @Test(expected = IllegalArgumentException.class) + public void testComputeNonOverlapRatioFromFirstNBitmapSamplesWithEmptyBitmaps() + { + final List bitmaps = Lists.newArrayList(); + Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); + } + + @Test + public void testComputeNonOverlapRatioFromFirstNBitmapSamples() throws Exception + { + final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); + final double expected = 0.2; + assertEquals(expected, estimated, 0.00001); + } + + @Test + public void testEstimateSelectivityOfBitmapList() + { + final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + final BitmapIndex bitmapIndex = makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.estimateSelectivityOfBitmapList( + bitmapIndex, + IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)), + 1000, + true + ); + final double expected = 0.208; // total # of bits is 208 = 10 + 99 * 2 + assertEquals(expected, estimated, 0.00001); + } + + @Test + public void testEstimateSelectivityOfBitmapTree() + { + final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; + final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); + makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); + + final double estimated = Filters.estimateSelectivityOfBitmapTree( + bitmaps, + 1000, + true + ); + final double expected = 0.208; // total # of bits is 208 = 10 + 99 * 2 + assertEquals(expected, estimated, 0.00001); + } + + private static BitmapIndex getBitmapIndex(final List bitmapList) + { + return new BitmapIndex() + { + @Override + public int getCardinality() + { + return 10; + } + + @Override + public String getValue(int index) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNulls() + { + return false; + } + + @Override + public BitmapFactory getBitmapFactory() + { + return new ConciseBitmapFactory(); + } + + @Override + public int getIndex(String value) + { + throw new UnsupportedOperationException(); + } + + @Override + public ImmutableBitmap getBitmap(int idx) + { + return bitmapList.get(idx); + } + }; + } + + private static BitmapIndex makeFullyOverlappedBitmapIndexes(final int bitmapNum, final List bitmaps) + { + final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); + final BitmapFactory factory = bitmapIndex.getBitmapFactory(); + for (int i = 0; i < bitmapNum; i++) { + final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); + for (int j = 0; j < 10; j++) { + mutableBitmap.add(j * 10); + } + bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); + } + return bitmapIndex; + } + + private static BitmapIndex makeNonOverlappedBitmapIndexes(final int bitmapNum, final List bitmaps) + { + final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); + final BitmapFactory factory = bitmapIndex.getBitmapFactory(); + int index = 0; + for (int i = 0; i < bitmapNum; i++) { + final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); + for (int j = 0; j < 10; j++) { + mutableBitmap.add(index++); + } + bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); + } + return bitmapIndex; + } + + private static BitmapIndex makePartiallyOverlappedBitmapIndexes(int bitmapNum, List bitmaps) + { + final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); + final BitmapFactory factory = bitmapIndex.getBitmapFactory(); + int startIndex = 0; + for (int i = 0; i < bitmapNum; i++) { + final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); + for (int j = 0; j < 10; j++) { + mutableBitmap.add(startIndex + j); + } + startIndex += 2; // 80% of bitmaps are overlapped + bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); + } + return bitmapIndex; + } +} From 010fe7b59b7a400a0477462861c46392bce40959 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 26 Jan 2017 11:36:19 +0900 Subject: [PATCH 5/9] Addressed comments - Split a long ternary operation into if-else blocks - Add IntListUtils.fromTo() --- .../java/io/druid/segment/IntListUtils.java | 59 +++++++++++++++++++ .../io/druid/segment/filter/BoundFilter.java | 27 ++------- .../java/io/druid/segment/filter/Filters.java | 27 +++++---- 3 files changed, 80 insertions(+), 33 deletions(-) create mode 100644 processing/src/main/java/io/druid/segment/IntListUtils.java diff --git a/processing/src/main/java/io/druid/segment/IntListUtils.java b/processing/src/main/java/io/druid/segment/IntListUtils.java new file mode 100644 index 000000000000..557fcde65889 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/IntListUtils.java @@ -0,0 +1,59 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.AbstractIntList; +import it.unimi.dsi.fastutil.ints.IntList; + +public class IntListUtils +{ + private IntListUtils() {} + + public static IntList fromTo(int from, int to) + { + return new RangeIntList(from, to); + } + + static final class RangeIntList extends AbstractIntList + { + private final int start; + private final int size; + + public RangeIntList(int start, int end) + { + this.start = start; + this.size = end - start; + } + + @Override + public int getInt(int index) + { + Preconditions.checkElementIndex(index, size); + return start + index; + } + + @Override + public int size() + { + return size; + } + } +} diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 1b4882feb698..9fb157483590 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -33,11 +33,8 @@ import io.druid.query.ordering.StringComparators; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.IntIteratorUtils; +import io.druid.segment.IntListUtils; import io.druid.segment.column.BitmapIndex; -import it.unimi.dsi.fastutil.ints.IntIterable; -import it.unimi.dsi.fastutil.ints.IntIterator; -import it.unimi.dsi.fastutil.ints.IntIterators; import it.unimi.dsi.fastutil.ints.IntList; import java.util.Comparator; @@ -68,8 +65,7 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) return doesMatch(null) ? Filters.allTrue(selector) : Filters.allFalse(selector); } - return selector.getBitmapFactory().union(getBitmapIterator(boundDimFilter, bitmapIndex) - ); + return selector.getBitmapFactory().union(getBitmapIterator(boundDimFilter, bitmapIndex)); } else { return Filters.matchPredicate( boundDimFilter.getDimension(), @@ -162,35 +158,20 @@ private static Iterable getBitmapIterator( final BitmapIndex bitmapIndex ) { - return Filters.bitmapsFromIndexes(getBitmapIndexIterator(boundDimFilter, bitmapIndex), bitmapIndex); + return Filters.bitmapsFromIndexes(getBitmapIndexList(boundDimFilter, bitmapIndex), bitmapIndex); } private static IntList getBitmapIndexList( final BoundDimFilter boundDimFilter, final BitmapIndex bitmapIndex ) - { - return IntIteratorUtils.toIntList(getBitmapIndexIterator(boundDimFilter, bitmapIndex).iterator()); - } - - private static IntIterable getBitmapIndexIterator( - final BoundDimFilter boundDimFilter, - final BitmapIndex bitmapIndex - ) { // search for start, end indexes in the bitmaps; then include all bitmaps between those points final Pair indexes = getStartEndIndexes(boundDimFilter, bitmapIndex); final int startIndex = indexes.lhs; final int endIndex = indexes.rhs; - return new IntIterable() - { - @Override - public IntIterator iterator() - { - return IntIterators.fromTo(startIndex, endIndex); - } - }; + return IntListUtils.fromTo(startIndex, endIndex); } private DruidPredicateFactory getPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index b8a27fed4b60..19880e56ce5f 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -19,6 +19,7 @@ package io.druid.segment.filter; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -327,6 +328,7 @@ static double estimatePredicateSelectivity( ); } + @VisibleForTesting static double estimateSelectivityOfBitmapList( BitmapIndex bitmapIndex, IntList bitmapIndexes, @@ -335,16 +337,19 @@ static double estimateSelectivityOfBitmapList( ) { double numMatchedRows = bitmapIndexes.size() > 0 ? bitmapIndex.getBitmap(bitmapIndexes.get(0)).size() : 0; - final double nonOverlapRatio = isMultiValueDimension && bitmapIndexes.size() > 1 - ? bitmapIndexes.size() > SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION * 5 - ? computeNonOverlapRatioFromRandomBitmapSamples(bitmapIndex, bitmapIndexes) - : computeNonOverlapRatioFromFirstNBitmapSamples( - bitmapsFromIndexes( - bitmapIndexes, - bitmapIndex - ) - ) - : 1.; + final double nonOverlapRatio; + if (isMultiValueDimension && bitmapIndexes.size() > 1) { + nonOverlapRatio = bitmapIndexes.size() > SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION * 5 + ? computeNonOverlapRatioFromRandomBitmapSamples(bitmapIndex, bitmapIndexes) + : computeNonOverlapRatioFromFirstNBitmapSamples( + bitmapsFromIndexes( + bitmapIndexes, + bitmapIndex + ) + ); + } else { + nonOverlapRatio = 1.; + } for (int i = 1; i < bitmapIndexes.size(); i++) { final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmapIndexes.get(i)); @@ -403,6 +408,7 @@ static double estimateSelectivityOfBitmapTree( * * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ + @VisibleForTesting static double computeNonOverlapRatioFromRandomBitmapSamples( BitmapIndex bitmapIndex, IntList bitmapIndexes @@ -456,6 +462,7 @@ static double computeNonOverlapRatioFromRandomBitmapSamples( * * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) */ + @VisibleForTesting static double computeNonOverlapRatioFromFirstNBitmapSamples(Iterable bitmaps) { final Iterator iterator = bitmaps.iterator(); From 606abffc6459fc0dbaa7c139f589be96634dd460 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 26 Jan 2017 15:18:52 +0900 Subject: [PATCH 6/9] Fix test failure and add a test for RangeIntList --- .../java/io/druid/segment/IntListUtils.java | 4 +- .../io/druid/segment/IntListUtilsTest.java | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 processing/src/test/java/io/druid/segment/IntListUtilsTest.java diff --git a/processing/src/main/java/io/druid/segment/IntListUtils.java b/processing/src/main/java/io/druid/segment/IntListUtils.java index 557fcde65889..2b7802c1ae62 100644 --- a/processing/src/main/java/io/druid/segment/IntListUtils.java +++ b/processing/src/main/java/io/druid/segment/IntListUtils.java @@ -32,7 +32,7 @@ public static IntList fromTo(int from, int to) return new RangeIntList(from, to); } - static final class RangeIntList extends AbstractIntList + private static final class RangeIntList extends AbstractIntList { private final int start; private final int size; @@ -40,7 +40,7 @@ static final class RangeIntList extends AbstractIntList public RangeIntList(int start, int end) { this.start = start; - this.size = end - start; + this.size = Math.max(end - start, 0); } @Override diff --git a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java new file mode 100644 index 000000000000..77529d8b68ee --- /dev/null +++ b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment; + +import it.unimi.dsi.fastutil.ints.IntList; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class IntListUtilsTest +{ + @Test(expected = IndexOutOfBoundsException.class) + public void testEmptyRangeIntList() + { + final IntList list = IntListUtils.fromTo(10, 10); + assertEquals(0, list.size()); + list.get(0); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void testRangeIntListWithSmallEndIndex() + { + final IntList list = IntListUtils.fromTo(10, 5); + assertEquals(0, list.size()); + list.get(0); + } + + @Test + public void testRangeIntList() + { + final IntList list = IntListUtils.fromTo(20, 120); + for (int i = 0; i < 100; i++) { + assertEquals(i + 20, list.getInt(i)); + } + } +} \ No newline at end of file From 71eb14fb6425f0bad2baa1305d0a3f777bdbe3c0 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 26 Jan 2017 15:23:50 +0900 Subject: [PATCH 7/9] fix code style --- processing/src/test/java/io/druid/segment/IntListUtilsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java index 77529d8b68ee..b75712eca966 100644 --- a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java +++ b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java @@ -50,4 +50,4 @@ public void testRangeIntList() assertEquals(i + 20, list.getInt(i)); } } -} \ No newline at end of file +} From fcd6f07ac7675218c991dddbfe973bfff3b667c9 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Sat, 28 Jan 2017 01:40:46 +0900 Subject: [PATCH 8/9] Diabled selectivity estimation for multi-valued dimensions --- .../java/io/druid/query/filter/Filter.java | 22 +- .../query/search/search/AutoStrategy.java | 10 +- .../java/io/druid/segment/IntListUtils.java | 4 +- .../io/druid/segment/filter/AndFilter.java | 17 +- .../io/druid/segment/filter/BoundFilter.java | 13 +- .../filter/DimensionPredicateFilter.java | 11 +- .../java/io/druid/segment/filter/Filters.java | 198 ++---------------- .../io/druid/segment/filter/InFilter.java | 13 +- .../segment/filter/JavaScriptFilter.java | 12 +- .../io/druid/segment/filter/LikeFilter.java | 13 +- .../io/druid/segment/filter/NotFilter.java | 12 +- .../io/druid/segment/filter/OrFilter.java | 17 +- .../druid/segment/filter/SelectorFilter.java | 10 +- .../druid/segment/filter/SpatialFilter.java | 18 +- .../druid/segment/filter/BaseFilterTest.java | 10 +- .../io/druid/segment/filter/FiltersTest.java | 117 +---------- 16 files changed, 170 insertions(+), 327 deletions(-) diff --git a/processing/src/main/java/io/druid/query/filter/Filter.java b/processing/src/main/java/io/druid/query/filter/Filter.java index ffc1ce5a47e1..0aabd4beeef3 100644 --- a/processing/src/main/java/io/druid/query/filter/Filter.java +++ b/processing/src/main/java/io/druid/query/filter/Filter.java @@ -34,7 +34,7 @@ public interface Filter * * @return A bitmap indicating rows that match this filter. * - * @see Filter#estimateSelectivity(ColumnSelector, BitmapIndexSelector) + * @see Filter#estimateSelectivity(BitmapIndexSelector) */ ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector); @@ -47,14 +47,13 @@ public interface Filter * with reasonable sacrifice of the accuracy. * As a result, the estimated selectivity might be different from the exact value. * - * @param columnSelector Column selector to retrieve column capabilities - * @param indexSelector Object used to retrieve bitmap indexes + * @param indexSelector Object used to retrieve bitmap indexes * * @return an estimated selectivity ranging from 0 (filter selects no rows) to 1 (filter selects all rows). * * @see Filter#getBitmapIndex(BitmapIndexSelector) */ - double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); + double estimateSelectivity(BitmapIndexSelector indexSelector); /** @@ -73,7 +72,20 @@ public interface Filter * * @param selector Object used to retrieve bitmap indexes * - * @return true if this Filter can provide a bitmap index using the selector, false otherwise + * @return true if this Filter can provide a bitmap index using the selector, false otherwise. */ boolean supportsBitmapIndex(BitmapIndexSelector selector); + + + /** + * Indicates whether this filter supports selectivity estimation. + * A filter supports selectivity estimation if it supports bitmap index and + * the dimension which the filter evaluates does not have multi values. + * + * @param columnSelector Object to check the dimension has multi values. + * @param indexSelector Object used to retrieve bitmap indexes + * + * @return true if this Filter supports selectivity estimation, false otherwise. + */ + boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector); } diff --git a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java index e43038697256..f574c108bed5 100644 --- a/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java +++ b/processing/src/main/java/io/druid/query/search/search/AutoStrategy.java @@ -62,7 +62,7 @@ public List getExecutionPlan(SearchQuery query, Segment seg // Note: if some filters support bitmap indexes but others are not, the current implementation always employs // the cursor-based plan. This can be more optimized. One possible optimization is generating a bitmap index // from the non-bitmap-support filters, and then use it to compute the filtered result by intersecting bitmaps. - if (filter == null || filter.supportsBitmapIndex(selector)) { + if (filter == null || filter.supportsSelectivityEstimation(index, selector)) { final List dimsToSearch = getDimsToSearch( index.getAvailableDimensions(), query.getDimensions() @@ -77,12 +77,14 @@ public List getExecutionPlan(SearchQuery query, Segment seg // * (search predicate processing cost) final SearchQueryDecisionHelper helper = getDecisionHelper(index); final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch); - final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(index, selector)) + final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector)) * selector.getNumRows() * dimsToSearch.size(); - log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", - useIndexStrategyCost, cursorOnlyStrategyCost + log.debug( + "Use-index strategy cost: %f, cursor-only strategy cost: %f", + useIndexStrategyCost, + cursorOnlyStrategyCost ); if (useIndexStrategyCost < cursorOnlyStrategyCost) { diff --git a/processing/src/main/java/io/druid/segment/IntListUtils.java b/processing/src/main/java/io/druid/segment/IntListUtils.java index 2b7802c1ae62..ee21c569643a 100644 --- a/processing/src/main/java/io/druid/segment/IntListUtils.java +++ b/processing/src/main/java/io/druid/segment/IntListUtils.java @@ -29,6 +29,8 @@ private IntListUtils() {} public static IntList fromTo(int from, int to) { + // TODO: check `from` is always smaller than or equal to `to`. + // It's currently disabled because BoundFilter.getStartEndIndexes() sometimes violates this condition. return new RangeIntList(from, to); } @@ -37,7 +39,7 @@ private static final class RangeIntList extends AbstractIntList private final int start; private final int size; - public RangeIntList(int start, int end) + RangeIntList(int start, int end) { this.start = start; this.size = Math.max(end - start, 0); diff --git a/processing/src/main/java/io/druid/segment/filter/AndFilter.java b/processing/src/main/java/io/druid/segment/filter/AndFilter.java index 0ac0e1aef940..dbe4c7f45f09 100644 --- a/processing/src/main/java/io/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/AndFilter.java @@ -150,12 +150,25 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + final ColumnSelector columnSelector, final BitmapIndexSelector indexSelector + ) + { + for (Filter filter : filters) { + if (!filter.supportsSelectivityEstimation(columnSelector, indexSelector)) { + return false; + } + } + return true; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { // Estimate selectivity with attribute value independence assumption double selectivity = 1.0; for (final Filter filter : filters) { - selectivity *= filter.estimateSelectivity(columnSelector, indexSelector); + selectivity *= filter.estimateSelectivity(indexSelector); } return selectivity; } diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 9fb157483590..4a961a20fac2 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -76,7 +76,7 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public double estimateSelectivity(BitmapIndexSelector indexSelector) { if (supportShortCircuit()) { final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(boundDimFilter.getDimension()); @@ -87,14 +87,11 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele return Filters.estimatePredicateSelectivity( bitmapIndex, - columnSelector, - boundDimFilter.getDimension(), getBitmapIndexList(boundDimFilter, bitmapIndex), indexSelector.getNumRows() ); } else { return Filters.estimatePredicateSelectivity( - columnSelector, boundDimFilter.getDimension(), indexSelector, getPredicateFactory().makeStringPredicate() @@ -120,6 +117,14 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(boundDimFilter.getDimension()) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, boundDimFilter.getDimension(), columnSelector, indexSelector); + } + private static Pair getStartEndIndexes( final BoundDimFilter boundDimFilter, final BitmapIndex bitmapIndex diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 030e175c63be..0155f9f4cb9e 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -106,10 +106,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { return Filters.estimatePredicateSelectivity( - columnSelector, dimension, indexSelector, predicateFactory.makeStringPredicate() diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 19880e56ce5f..f4b1ccdabb63 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -47,6 +47,7 @@ import io.druid.segment.IntIteratorUtils; import io.druid.segment.LongColumnSelector; import io.druid.segment.column.BitmapIndex; +import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; import io.druid.segment.data.Indexed; @@ -59,7 +60,6 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.concurrent.ThreadLocalRandom; /** */ @@ -67,7 +67,6 @@ public class Filters { public static final List FILTERABLE_TYPES = ImmutableList.of(ValueType.STRING, ValueType.LONG); private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; - static final int SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION = 100; /** * Convert a list of DimFilters to a list of Filters. @@ -246,7 +245,7 @@ public void remove() * * @return bitmap of matching rows * - * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) + * @see #estimatePredicateSelectivity(String, BitmapIndexSelector, Predicate) */ public static ImmutableBitmap matchPredicate( final String dimension, @@ -273,7 +272,6 @@ public static ImmutableBitmap matchPredicate( /** * Return an estimated selectivity for bitmaps of all values matching the given predicate. * - * @param columnSelector column selector * @param dimension dimension to look at * @param indexSelector bitmap selector * @param predicate predicate to use @@ -283,7 +281,6 @@ public static ImmutableBitmap matchPredicate( * @see #matchPredicate(String, BitmapIndexSelector, Predicate) */ static double estimatePredicateSelectivity( - final ColumnSelector columnSelector, final String dimension, final BitmapIndexSelector indexSelector, final Predicate predicate @@ -303,190 +300,25 @@ static double estimatePredicateSelectivity( final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); return estimatePredicateSelectivity( bitmapIndex, - columnSelector, - dimension, IntIteratorUtils.toIntList(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues).iterator()), indexSelector.getNumRows() ); } + @VisibleForTesting static double estimatePredicateSelectivity( BitmapIndex bitmapIndex, - ColumnSelector columnSelector, - String dimension, IntList bitmapIndexes, long totalNumRows ) { - final ColumnCapabilities columnCapabilities = columnSelector.getColumn(dimension).getCapabilities(); - return estimateSelectivityOfBitmapList( - bitmapIndex, - bitmapIndexes, - totalNumRows, - // assume multi-value column if columnCapabilities is null - columnCapabilities == null || columnCapabilities.hasMultipleValues() - ); - } - - @VisibleForTesting - static double estimateSelectivityOfBitmapList( - BitmapIndex bitmapIndex, - IntList bitmapIndexes, - long totalNumRows, - boolean isMultiValueDimension - ) - { - double numMatchedRows = bitmapIndexes.size() > 0 ? bitmapIndex.getBitmap(bitmapIndexes.get(0)).size() : 0; - final double nonOverlapRatio; - if (isMultiValueDimension && bitmapIndexes.size() > 1) { - nonOverlapRatio = bitmapIndexes.size() > SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION * 5 - ? computeNonOverlapRatioFromRandomBitmapSamples(bitmapIndex, bitmapIndexes) - : computeNonOverlapRatioFromFirstNBitmapSamples( - bitmapsFromIndexes( - bitmapIndexes, - bitmapIndex - ) - ); - } else { - nonOverlapRatio = 1.; - } - - for (int i = 1; i < bitmapIndexes.size(); i++) { + long numMatchedRows = 0; + for (int i = 0; i < bitmapIndexes.size(); i++) { final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmapIndexes.get(i)); - numMatchedRows += bitmap.size() * nonOverlapRatio; - } - - return Math.min(1., numMatchedRows / totalNumRows); - } - - static double estimateSelectivityOfBitmapTree( - Iterable bitmaps, - long totalNumRows, - boolean isMultiValueDimension - ) - { - final Iterator iterator = bitmaps.iterator(); - double numMatchedRows = 0; - if (iterator.hasNext()) { - numMatchedRows = iterator.next().size(); - final double nonOverlapRatio = isMultiValueDimension && iterator.hasNext() - ? computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps) - : 1.; - - while (iterator.hasNext()) { - final ImmutableBitmap bitmap = iterator.next(); - numMatchedRows += bitmap.size() * nonOverlapRatio; - } - } - - return Math.min(1., numMatchedRows / totalNumRows); - } - - /** - * This method is to estimate how many bits of bitmaps are not overlapped in average. - * Since a multi-value dimension can have one or more values, one or more bitmaps for that dimension can be set for the same row. - * As a result, to get the exact size of unioned bitmaps, which is widely useful for query planning like - * filter selectivity estimation, expensive union operations of bitmaps are inevitable. - * To avoid such overhead, this method can be used to compute the approximate unioned size based on random sampling. - *

- * The non-overlap ratio can be computed like below. - *

- * overlapSize = size(b1) + size(b2) - size(union(b1, b2)) - * nonOverlapRatio(b2) = (size(b2) - overlapSize) / size(b2) - *

- * The approximate unioned size is - *

- * unionedSize = size(b1) + size(b2) * nonOverlapRatio(b2) - *

- * Given bitmaps, this method calculates the non-overlap ratios of N bitmap samples, - * and then returns the average of them. - * - * @param bitmapIndex bitmap index to retrieve bitmaps - * @param bitmapIndexes a list of indexes of bitmaps - * - * @return approximated average non-overlap ratio of bitmaps. - * - * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) - */ - @VisibleForTesting - static double computeNonOverlapRatioFromRandomBitmapSamples( - BitmapIndex bitmapIndex, - IntList bitmapIndexes - ) - { - Preconditions.checkArgument(bitmapIndexes.size() > 1, "require at least two elements"); - - final int sampleNum = Math.min(bitmapIndexes.size(), SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION); - final ThreadLocalRandom random = ThreadLocalRandom.current(); - ImmutableBitmap unioned = bitmapIndex.getBitmap(bitmapIndexes.get(random.nextInt(bitmapIndexes.size()))); - int unionedSize = unioned.size(); - double nonOverlapRatioSum = 0.; - - for (int i = 0; i < sampleNum; i++) { - final ImmutableBitmap b = bitmapIndex.getBitmap(bitmapIndexes.get(random.nextInt(bitmapIndexes.size()))); - final int bSize = b.size(); - final int preUnionedSize = unionedSize; - - unioned = unioned.union(b); - unionedSize = unioned.size(); - - final int overlapSize = (preUnionedSize + bSize) - unionedSize; - final double nonOverlapRatio = (double) (bSize - overlapSize) / bSize; - nonOverlapRatioSum += nonOverlapRatio; + numMatchedRows += bitmap.size(); } - return nonOverlapRatioSum / sampleNum; - } - /** - * This method is to estimate how many bits of bitmaps are not overlapped in average. - * Since a multi-value dimension can have one or more values, one or more bitmaps for that dimension can be set for the same row. - * As a result, to get the exact size of unioned bitmaps, which is widely useful for query planning like - * filter selectivity estimation, expensive union operations of bitmaps are inevitable. - * To avoid such overhead, this method can be used to compute the approximate unioned size based on sampling. - *

- * The non-overlap ratio can be computed like below. - *

- * overlapSize = size(b1) + size(b2) - size(union(b1, b2)) - * nonOverlapRatio(b2) = (size(b2) - overlapSize) / size(b2) - *

- * The approximate unioned size is - *

- * unionedSize = size(b1) + size(b2) * nonOverlapRatio(b2) - *

- * Given bitmaps, this method calculates the non-overlap ratios of the first N bitmap samples, - * and then returns the average of them. - * - * @param bitmaps An iterable of bitmaps - * - * @return approximated average non-overlap ratio of bitmaps. - * - * @see #estimatePredicateSelectivity(ColumnSelector, String, BitmapIndexSelector, Predicate) - */ - @VisibleForTesting - static double computeNonOverlapRatioFromFirstNBitmapSamples(Iterable bitmaps) - { - final Iterator iterator = bitmaps.iterator(); - Preconditions.checkArgument(iterator.hasNext(), "empty iterator"); - ImmutableBitmap unioned = iterator.next(); - Preconditions.checkArgument(iterator.hasNext(), "require at least two elements"); - - double nonOverlapRatioSum = 0.; - int sampleNum = 0; - int unionedSize = unioned.size(); - - while (iterator.hasNext() && sampleNum++ < SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION) { - final ImmutableBitmap b = iterator.next(); - final int bSize = b.size(); - final int preUnionedSize = unionedSize; - - unioned = unioned.union(b); - unionedSize = unioned.size(); - - final int overlapSize = (preUnionedSize + bSize) - unionedSize; - final double nonOverlapRatio = (double) (bSize - overlapSize) / bSize; - nonOverlapRatioSum += nonOverlapRatio; - } - return nonOverlapRatioSum / sampleNum; + return Math.min(1., (double) numMatchedRows / totalNumRows); } private static Iterable makePredicateQualifyingBitmapIterable( @@ -553,6 +385,22 @@ public int nextInt() }; } + static boolean supportsSelectivityEstimation( + Filter filter, + String dimension, + ColumnSelector columnSelector, + BitmapIndexSelector indexSelector + ) + { + if (filter.supportsBitmapIndex(indexSelector)) { + final Column column = columnSelector.getColumn(dimension); + if (column != null) { + return !column.getCapabilities().hasMultipleValues(); + } + } + return false; + } + public static ValueMatcher getLongValueMatcher( final LongColumnSelector longSelector, final String value diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 053b55e2ac7e..c9a7ce902d1b 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -78,20 +78,17 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public double estimateSelectivity(BitmapIndexSelector indexSelector) { if (extractionFn == null) { final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); return Filters.estimatePredicateSelectivity( bitmapIndex, - columnSelector, - dimension, IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), indexSelector.getNumRows() ); } else { return Filters.estimatePredicateSelectivity( - columnSelector, dimension, indexSelector, getPredicateFactory().makeStringPredicate() @@ -143,6 +140,14 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + private DruidPredicateFactory getPredicateFactory() { return new DruidPredicateFactory() diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index 3aa24f5363c6..ab7561a6db9f 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -56,11 +56,11 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public double estimateSelectivity(BitmapIndexSelector indexSelector) { final Context cx = Context.enter(); try { - return Filters.estimatePredicateSelectivity(columnSelector, dimension, indexSelector, makeStringPredicate(cx)); + return Filters.estimatePredicateSelectivity(dimension, indexSelector, makeStringPredicate(cx)); } finally { Context.exit(); @@ -91,4 +91,12 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) { return selector.getBitmapIndex(dimension) != null; } + + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } } diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 4778271000ec..b49088f49deb 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -87,7 +87,7 @@ public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public double estimateSelectivity(BitmapIndexSelector indexSelector) { if (emptyExtractFn() && emptySuffixMatch()) { // dimension equals prefix @@ -108,15 +108,12 @@ public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSele // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. return Filters.estimatePredicateSelectivity( bitmapIndex, - columnSelector, - dimension, getBitmapIndexList(bitmapIndex, likeMatcher, dimValues), indexSelector.getNumRows() ); } else { // fallback return Filters.estimatePredicateSelectivity( - columnSelector, dimension, indexSelector, likeMatcher.predicateFactory(extractionFn).makeStringPredicate() @@ -151,6 +148,14 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) return selector.getBitmapIndex(dimension) != null; } + @Override + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + private static Iterable getBitmapIterator( final BitmapIndex bitmapIndex, final LikeDimFilter.LikeMatcher likeMatcher, diff --git a/processing/src/main/java/io/druid/segment/filter/NotFilter.java b/processing/src/main/java/io/druid/segment/filter/NotFilter.java index 8358a5cee9cc..cbba340b5c86 100644 --- a/processing/src/main/java/io/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/NotFilter.java @@ -70,9 +70,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return baseFilter.supportsSelectivityEstimation(columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { - return 1. - baseFilter.estimateSelectivity(columnSelector, indexSelector); + return 1. - baseFilter.estimateSelectivity(indexSelector); } public Filter getBaseFilter() diff --git a/processing/src/main/java/io/druid/segment/filter/OrFilter.java b/processing/src/main/java/io/druid/segment/filter/OrFilter.java index 98daed553178..02fe68f937f4 100644 --- a/processing/src/main/java/io/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/OrFilter.java @@ -157,12 +157,25 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + for (Filter filter : filters) { + if(!filter.supportsSelectivityEstimation(columnSelector, indexSelector)) { + return false; + } + } + return true; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { // Estimate selectivity with attribute value independence assumption double selectivity = 0; for (final Filter filter : filters) { - selectivity += filter.estimateSelectivity(columnSelector, indexSelector); + selectivity += filter.estimateSelectivity(indexSelector); } return Math.min(selectivity, 1.); } diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index ed76732c58a1..987921f7ff0b 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -61,7 +61,15 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { return (double) indexSelector.getBitmapIndex(dimension, value).size() / indexSelector.getNumRows(); } diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index 4898d306a808..a17831fa297f 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -103,13 +103,17 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) { - // handle rtree overlap - return Filters.estimateSelectivityOfBitmapTree( - indexSelector.getSpatialIndex(dimension).search(bound), - indexSelector.getNumRows(), - true - ); + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + // selectivity estimation for multi-value columns is not implemented yet. + throw new UnsupportedOperationException(); } } diff --git a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java index 0d6d7aae11d4..0c5cc996d1ce 100644 --- a/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/BaseFilterTest.java @@ -388,7 +388,15 @@ public boolean supportsBitmapIndex(BitmapIndexSelector selector) } @Override - public double estimateSelectivity(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + public boolean supportsSelectivityEstimation( + ColumnSelector columnSelector, BitmapIndexSelector indexSelector + ) + { + return false; + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) { return 1.0; } diff --git a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java index 9f0de459779b..d7dd5abcf7aa 100644 --- a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java +++ b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java @@ -35,93 +35,19 @@ public class FiltersTest { - @Test - public void testComputeNonOverlapRatioFromRandomBitmapSamplesWithFullyOverlappedBitmaps() - { - final int bitmapNum = 10; - final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); - final BitmapIndex bitmapIndex = makeFullyOverlappedBitmapIndexes(bitmapNum, bitmaps); - - final double estimated = Filters.computeNonOverlapRatioFromRandomBitmapSamples( - bitmapIndex, - IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)) - ); - final double expected = 0.0; - assertEquals(expected, estimated, 0.00001); - } - - @Test(expected = IllegalArgumentException.class) - public void testComputeNonOverlapRatioFromRandomBitmapSamplesWithEmptyBitmaps() - { - final List bitmaps = Lists.newArrayList(); - final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); - Filters.computeNonOverlapRatioFromRandomBitmapSamples( - bitmapIndex, - IntIteratorUtils.toIntList(IntIterators.EMPTY_ITERATOR) - ); - } - - @Test - public void testComputeNonOverlapRatioFromFirstNBitmapSamplesWithNonOverlapBitmaps() throws Exception - { - final int bitmapNum = 10; - final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); - makeNonOverlappedBitmapIndexes(bitmapNum, bitmaps); - - final double estimated = Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); - final double expected = 1.0; - assertEquals(expected, estimated, 0.00001); - } - - @Test(expected = IllegalArgumentException.class) - public void testComputeNonOverlapRatioFromFirstNBitmapSamplesWithEmptyBitmaps() - { - final List bitmaps = Lists.newArrayList(); - Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); - } - - @Test - public void testComputeNonOverlapRatioFromFirstNBitmapSamples() throws Exception - { - final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; - final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); - makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); - - final double estimated = Filters.computeNonOverlapRatioFromFirstNBitmapSamples(bitmaps); - final double expected = 0.2; - assertEquals(expected, estimated, 0.00001); - } - @Test public void testEstimateSelectivityOfBitmapList() { - final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; + final int bitmapNum = 100; final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); - final BitmapIndex bitmapIndex = makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); + final BitmapIndex bitmapIndex = makeNonOverlappedBitmapIndexes(bitmapNum, bitmaps); - final double estimated = Filters.estimateSelectivityOfBitmapList( + final double estimated = Filters.estimatePredicateSelectivity( bitmapIndex, IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)), - 1000, - true + 10000 ); - final double expected = 0.208; // total # of bits is 208 = 10 + 99 * 2 - assertEquals(expected, estimated, 0.00001); - } - - @Test - public void testEstimateSelectivityOfBitmapTree() - { - final int bitmapNum = Filters.SAMPLE_NUM_FOR_SELECTIVITY_ESTIMATION; - final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); - makePartiallyOverlappedBitmapIndexes(bitmapNum, bitmaps); - - final double estimated = Filters.estimateSelectivityOfBitmapTree( - bitmaps, - 1000, - true - ); - final double expected = 0.208; // total # of bits is 208 = 10 + 99 * 2 + final double expected = 0.1; assertEquals(expected, estimated, 0.00001); } @@ -167,46 +93,15 @@ public ImmutableBitmap getBitmap(int idx) }; } - private static BitmapIndex makeFullyOverlappedBitmapIndexes(final int bitmapNum, final List bitmaps) - { - final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); - final BitmapFactory factory = bitmapIndex.getBitmapFactory(); - for (int i = 0; i < bitmapNum; i++) { - final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); - for (int j = 0; j < 10; j++) { - mutableBitmap.add(j * 10); - } - bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); - } - return bitmapIndex; - } - private static BitmapIndex makeNonOverlappedBitmapIndexes(final int bitmapNum, final List bitmaps) { final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); final BitmapFactory factory = bitmapIndex.getBitmapFactory(); - int index = 0; - for (int i = 0; i < bitmapNum; i++) { - final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); - for (int j = 0; j < 10; j++) { - mutableBitmap.add(index++); - } - bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); - } - return bitmapIndex; - } - - private static BitmapIndex makePartiallyOverlappedBitmapIndexes(int bitmapNum, List bitmaps) - { - final BitmapIndex bitmapIndex = getBitmapIndex(bitmaps); - final BitmapFactory factory = bitmapIndex.getBitmapFactory(); - int startIndex = 0; for (int i = 0; i < bitmapNum; i++) { final MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap(); for (int j = 0; j < 10; j++) { - mutableBitmap.add(startIndex + j); + mutableBitmap.add(i * 10 + j); } - startIndex += 2; // 80% of bitmaps are overlapped bitmaps.add(factory.makeImmutableBitmap(mutableBitmap)); } return bitmapIndex; From 92da826ae254a74572017bc63045ca7685f16f33 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Sat, 28 Jan 2017 11:45:10 +0900 Subject: [PATCH 9/9] Address comment --- processing/src/main/java/io/druid/segment/IntListUtils.java | 5 ++--- .../src/main/java/io/druid/segment/filter/BoundFilter.java | 4 +++- .../src/test/java/io/druid/segment/IntListUtilsTest.java | 6 ++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/IntListUtils.java b/processing/src/main/java/io/druid/segment/IntListUtils.java index ee21c569643a..f4289eeac72a 100644 --- a/processing/src/main/java/io/druid/segment/IntListUtils.java +++ b/processing/src/main/java/io/druid/segment/IntListUtils.java @@ -29,8 +29,7 @@ private IntListUtils() {} public static IntList fromTo(int from, int to) { - // TODO: check `from` is always smaller than or equal to `to`. - // It's currently disabled because BoundFilter.getStartEndIndexes() sometimes violates this condition. + Preconditions.checkArgument(from <= to); return new RangeIntList(from, to); } @@ -42,7 +41,7 @@ private static final class RangeIntList extends AbstractIntList RangeIntList(int start, int end) { this.start = start; - this.size = Math.max(end - start, 0); + this.size = end - start; } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 4a961a20fac2..aeb735de7239 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -131,7 +131,7 @@ private static Pair getStartEndIndexes( ) { final int startIndex; // inclusive - final int endIndex; // exclusive + int endIndex; // exclusive if (!boundDimFilter.hasLowerBound()) { startIndex = 0; @@ -155,6 +155,8 @@ private static Pair getStartEndIndexes( } } + endIndex = startIndex > endIndex ? startIndex : endIndex; + return new Pair<>(startIndex, endIndex); } diff --git a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java index b75712eca966..e1b992af4edd 100644 --- a/processing/src/test/java/io/druid/segment/IntListUtilsTest.java +++ b/processing/src/test/java/io/druid/segment/IntListUtilsTest.java @@ -34,12 +34,10 @@ public void testEmptyRangeIntList() list.get(0); } - @Test(expected = IndexOutOfBoundsException.class) + @Test(expected = IllegalArgumentException.class) public void testRangeIntListWithSmallEndIndex() { - final IntList list = IntListUtils.fromTo(10, 5); - assertEquals(0, list.size()); - list.get(0); + IntListUtils.fromTo(10, 5); } @Test