diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java index 3efdc51d4f60..3416cd827869 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java @@ -19,11 +19,13 @@ package io.druid.collections.bitmap; +import com.google.common.collect.Iterables; +import io.druid.extendedset.intset.ImmutableConciseSet; + import java.nio.ByteBuffer; +import java.util.Collection; import java.util.Iterator; -import io.druid.extendedset.intset.ImmutableConciseSet; - /** * As the name suggests, this class instantiates bitmaps of the types * WrappedConciseBitmap and WrappedImmutableConciseBitmap. @@ -109,6 +111,16 @@ public ImmutableBitmap mapImmutableBitmap(ByteBuffer b) public ImmutableBitmap union(Iterable b) throws ClassCastException { + if (b instanceof Collection) { + final Collection bitmapList = (Collection) b; + final int size = bitmapList.size(); + if (size == 0) { + return makeEmptyImmutableBitmap(); + } else if (size == 1) { + return Iterables.getOnlyElement(b); + } + } + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.union(unwrap(b))); } diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java index 0aa86e05c362..ea093b3a2358 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java @@ -20,6 +20,7 @@ package io.druid.collections.bitmap; import com.google.common.base.Throwables; +import com.google.common.collect.Iterables; import org.roaringbitmap.RoaringBitmap; import org.roaringbitmap.buffer.BufferFastAggregation; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -27,6 +28,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.nio.ByteBuffer; +import java.util.Collection; import java.util.Iterator; /** @@ -142,6 +144,16 @@ public ImmutableBitmap mapImmutableBitmap(ByteBuffer b) @Override public ImmutableBitmap union(Iterable b) { + if (b instanceof Collection) { + final Collection bitmapList = (Collection) b; + final int size = bitmapList.size(); + if (size == 0) { + return makeEmptyImmutableBitmap(); + } else if (size == 1) { + return Iterables.getOnlyElement(b); + } + } + return new WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap.or(unwrap(b).iterator())); } diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index aeb735de7239..c76485eb06b7 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -85,13 +85,13 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector) return doesMatch(null) ? 1. : 0.; } - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivity( bitmapIndex, getBitmapIndexList(boundDimFilter, bitmapIndex), indexSelector.getNumRows() ); } else { - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivity( boundDimFilter.getDimension(), indexSelector, getPredicateFactory().makeStringPredicate() diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 0155f9f4cb9e..db905e94ef64 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -116,7 +116,7 @@ public boolean supportsSelectivityEstimation( @Override public double estimateSelectivity(BitmapIndexSelector indexSelector) { - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivity( dimension, indexSelector, predicateFactory.makeStringPredicate() diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index f4b1ccdabb63..b710a66883a9 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -19,7 +19,6 @@ package io.druid.segment.filter; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -245,13 +244,33 @@ public void remove() * * @return bitmap of matching rows * - * @see #estimatePredicateSelectivity(String, BitmapIndexSelector, Predicate) + * @see #estimateSelectivity(String, BitmapIndexSelector, Predicate) */ public static ImmutableBitmap matchPredicate( final String dimension, final BitmapIndexSelector selector, final Predicate predicate ) + { + return selector.getBitmapFactory().union(matchPredicateNoUnion(dimension, selector, predicate)); + } + + /** + * Return an iterable of bitmaps for all values matching a particular predicate. Unioning these bitmaps + * yields the same result that {@link #matchPredicate(String, BitmapIndexSelector, Predicate)} would have + * returned. + * + * @param dimension dimension to look at + * @param selector bitmap selector + * @param predicate predicate to use + * + * @return iterable of bitmaps of matching rows + */ + public static Iterable matchPredicateNoUnion( + final String dimension, + final BitmapIndexSelector selector, + final Predicate predicate + ) { Preconditions.checkNotNull(dimension, "dimension"); Preconditions.checkNotNull(selector, "selector"); @@ -260,27 +279,26 @@ public static ImmutableBitmap matchPredicate( // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise final Indexed dimValues = selector.getDimensionValues(dimension); if (dimValues == null || dimValues.size() == 0) { - return predicate.apply(null) ? allTrue(selector) : allFalse(selector); + return ImmutableList.of(predicate.apply(null) ? allTrue(selector) : allFalse(selector)); } // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - return selector.getBitmapFactory() - .union(makePredicateQualifyingBitmapIterable(bitmapIndex, predicate, dimValues)); + return makePredicateQualifyingBitmapIterable(bitmapIndex, predicate, dimValues); } /** * Return an estimated selectivity for bitmaps of all values matching the given predicate. * - * @param dimension dimension to look at - * @param indexSelector bitmap selector - * @param predicate predicate to use + * @param dimension dimension to look at + * @param indexSelector bitmap selector + * @param predicate predicate to use * * @return estimated selectivity * * @see #matchPredicate(String, BitmapIndexSelector, Predicate) */ - static double estimatePredicateSelectivity( + public static double estimateSelectivity( final String dimension, final BitmapIndexSelector indexSelector, final Predicate predicate @@ -298,23 +316,53 @@ static double estimatePredicateSelectivity( // Apply predicate to all dimension values and union the matching bitmaps final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); - return estimatePredicateSelectivity( + return estimateSelectivity( bitmapIndex, IntIteratorUtils.toIntList(makePredicateQualifyingIndexIterable(bitmapIndex, predicate, dimValues).iterator()), indexSelector.getNumRows() ); } - @VisibleForTesting - static double estimatePredicateSelectivity( - BitmapIndex bitmapIndex, - IntList bitmapIndexes, - long totalNumRows + /** + * Return an estimated selectivity for bitmaps for the dimension values given by dimValueIndexes. + * + * @param bitmapIndex bitmap index + * @param bitmaps bitmaps to extract, by index + * @param totalNumRows number of rows in the column associated with this bitmap index + * + * @return estimated selectivity + */ + public static double estimateSelectivity( + final BitmapIndex bitmapIndex, + final IntList bitmaps, + final long totalNumRows + ) + { + long numMatchedRows = 0; + for (int i = 0; i < bitmaps.size(); i++) { + final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmaps.get(i)); + numMatchedRows += bitmap.size(); + } + + return Math.min(1., (double) numMatchedRows / totalNumRows); + } + + /** + * Return an estimated selectivity for bitmaps given by an iterator. + * + * @param bitmaps iterator of bitmaps + * @param totalNumRows number of rows in the column associated with this bitmap index + * + * @return estimated selectivity + */ + public static double estimateSelectivity( + final Iterator bitmaps, + final long totalNumRows ) { long numMatchedRows = 0; - for (int i = 0; i < bitmapIndexes.size(); i++) { - final ImmutableBitmap bitmap = bitmapIndex.getBitmap(bitmapIndexes.get(i)); + while (bitmaps.hasNext()) { + final ImmutableBitmap bitmap = bitmaps.next(); numMatchedRows += bitmap.size(); } diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index c9a7ce902d1b..9a2719d8c03d 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -82,13 +82,13 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector) { if (extractionFn == null) { final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivity( bitmapIndex, IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), indexSelector.getNumRows() ); } else { - return Filters.estimatePredicateSelectivity( + return Filters.estimateSelectivity( dimension, indexSelector, getPredicateFactory().makeStringPredicate() diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index ab7561a6db9f..dbe56672e897 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -60,7 +60,7 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector) { final Context cx = Context.enter(); try { - return Filters.estimatePredicateSelectivity(dimension, indexSelector, makeStringPredicate(cx)); + return Filters.estimateSelectivity(dimension, indexSelector, makeStringPredicate(cx)); } finally { Context.exit(); diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index b49088f49deb..8b47566e4294 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -20,6 +20,7 @@ package io.druid.segment.filter; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; @@ -28,13 +29,11 @@ import io.druid.query.filter.ValueMatcher; import io.druid.segment.ColumnSelector; import io.druid.segment.ColumnSelectorFactory; -import io.druid.segment.IntIteratorUtils; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.Indexed; import it.unimi.dsi.fastutil.ints.AbstractIntIterator; import it.unimi.dsi.fastutil.ints.IntIterable; import it.unimi.dsi.fastutil.ints.IntIterator; -import it.unimi.dsi.fastutil.ints.IntList; import java.util.NoSuchElementException; @@ -58,82 +57,13 @@ public LikeFilter( @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - if (emptyExtractFn() && emptySuffixMatch()) { - // dimension equals prefix - return selector.getBitmapIndex(dimension, likeMatcher.getPrefix()); - } else if (emptyExtractFn() && nonEmptyPrefix()) { - // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - - if (bitmapIndex == null) { - // Treat this as a column full of nulls - return likeMatcher.matches(null) ? Filters.allTrue(selector) : Filters.allFalse(selector); - } - - // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points - final Indexed dimValues = selector.getDimensionValues(dimension); - - // Union bitmaps for all matching dimension values in range. - // Use lazy iterator to allow unioning bitmaps one by one and avoid materializing all of them at once. - return selector.getBitmapFactory().union(getBitmapIterator(bitmapIndex, likeMatcher, dimValues)); - } else { - // fallback - return Filters.matchPredicate( - dimension, - selector, - likeMatcher.predicateFactory(extractionFn).makeStringPredicate() - ); - } + return selector.getBitmapFactory().union(getBitmapIterable(selector)); } @Override - public double estimateSelectivity(BitmapIndexSelector indexSelector) - { - if (emptyExtractFn() && emptySuffixMatch()) { - // dimension equals prefix - return (double) indexSelector.getBitmapIndex(dimension, likeMatcher.getPrefix()).size() - / indexSelector.getNumRows(); - } else if (emptyExtractFn() && nonEmptyPrefix()) { - // dimension startsWith prefix and is accepted by likeMatcher.matchesSuffixOnly - final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); - - if (bitmapIndex == null) { - // Treat this as a column full of nulls - return likeMatcher.matches(null) ? 1. : 0.; - } - - // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points - final Indexed dimValues = indexSelector.getDimensionValues(dimension); - - // Use lazy iterator to allow getting bitmap size one by one and avoid materializing all of them at once. - return Filters.estimatePredicateSelectivity( - bitmapIndex, - getBitmapIndexList(bitmapIndex, likeMatcher, dimValues), - indexSelector.getNumRows() - ); - } else { - // fallback - return Filters.estimatePredicateSelectivity( - dimension, - indexSelector, - likeMatcher.predicateFactory(extractionFn).makeStringPredicate() - ); - } - } - - private boolean emptyExtractFn() + public double estimateSelectivity(BitmapIndexSelector selector) { - return extractionFn == null; - } - - private boolean emptySuffixMatch() - { - return likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; - } - - private boolean nonEmptyPrefix() - { - return !likeMatcher.getPrefix().isEmpty(); + return Filters.estimateSelectivity(getBitmapIterable(selector).iterator(), selector.getNumRows()); } @Override @@ -156,27 +86,57 @@ public boolean supportsSelectivityEstimation( return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); } - private static Iterable getBitmapIterator( - final BitmapIndex bitmapIndex, - final LikeDimFilter.LikeMatcher likeMatcher, - final Indexed dimValues - ) + private Iterable getBitmapIterable(final BitmapIndexSelector selector) + { + if (isSimpleEquals()) { + // Verify that dimension equals prefix. + return ImmutableList.of(selector.getBitmapIndex(dimension, likeMatcher.getPrefix())); + } else if (isSimplePrefix()) { + // Verify that dimension startsWith prefix, and is accepted by likeMatcher.matchesSuffixOnly. + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + + if (bitmapIndex == null) { + // Treat this as a column full of nulls + return ImmutableList.of(likeMatcher.matches(null) ? Filters.allTrue(selector) : Filters.allFalse(selector)); + } + + // search for start, end indexes in the bitmaps; then include all matching bitmaps between those points + final Indexed dimValues = selector.getDimensionValues(dimension); + + // Union bitmaps for all matching dimension values in range. + // Use lazy iterator to allow unioning bitmaps one by one and avoid materializing all of them at once. + return Filters.bitmapsFromIndexes( + getDimValueIndexIterableForPrefixMatch(bitmapIndex, dimValues), + bitmapIndex + ); + } else { + // fallback + return Filters.matchPredicateNoUnion( + dimension, + selector, + likeMatcher.predicateFactory(extractionFn).makeStringPredicate() + ); + } + } + + /** + * Returns true if this filter is a simple equals filter: dimension = 'value' with no extractionFn. + */ + private boolean isSimpleEquals() { - return Filters.bitmapsFromIndexes(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues), bitmapIndex); + return extractionFn == null && likeMatcher.getSuffixMatch() == LikeDimFilter.LikeMatcher.SuffixMatch.MATCH_EMPTY; } - private static IntList getBitmapIndexList( - final BitmapIndex bitmapIndex, - final LikeDimFilter.LikeMatcher likeMatcher, - final Indexed dimValues - ) + /** + * Returns true if this filter is a simple prefix filter: dimension startsWith 'value' with no extractionFn. + */ + private boolean isSimplePrefix() { - return IntIteratorUtils.toIntList(getBitmapIndexIterator(bitmapIndex, likeMatcher, dimValues).iterator()); + return extractionFn == null && !likeMatcher.getPrefix().isEmpty(); } - private static IntIterable getBitmapIndexIterator( + private IntIterable getDimValueIndexIterableForPrefixMatch( final BitmapIndex bitmapIndex, - final LikeDimFilter.LikeMatcher likeMatcher, final Indexed dimValues ) { diff --git a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java index d7dd5abcf7aa..68f4f12c0eb4 100644 --- a/processing/src/test/java/io/druid/segment/filter/FiltersTest.java +++ b/processing/src/test/java/io/druid/segment/filter/FiltersTest.java @@ -42,7 +42,7 @@ public void testEstimateSelectivityOfBitmapList() final List bitmaps = Lists.newArrayListWithCapacity(bitmapNum); final BitmapIndex bitmapIndex = makeNonOverlappedBitmapIndexes(bitmapNum, bitmaps); - final double estimated = Filters.estimatePredicateSelectivity( + final double estimated = Filters.estimateSelectivity( bitmapIndex, IntIteratorUtils.toIntList(IntIterators.fromTo(0, bitmapNum)), 10000