From c703d20d03b266edc45a61d57e6b9aba065f3c1a Mon Sep 17 00:00:00 2001 From: cecemei Date: Wed, 11 Sep 2024 16:50:14 -0700 Subject: [PATCH 01/10] Create a new class FilterBundle.Builder, and refactor Filter/AndFilter/OrFilter to use it when making FilterBundle. --- .../druid/query/filter/BooleanFilter.java | 8 - .../org/apache/druid/query/filter/Filter.java | 83 +++++---- .../druid/query/filter/FilterBundle.java | 176 +++++++++++++----- .../segment/QueryableIndexCursorHolder.java | 6 +- .../druid/segment/filter/AndFilter.java | 33 +++- .../apache/druid/segment/filter/OrFilter.java | 23 ++- .../segment/index/BitmapColumnIndex.java | 7 + .../segment/filter/FilterBundleTest.java | 3 +- 8 files changed, 229 insertions(+), 110 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java index e14e8b8d5e30..2cb8a68b6e91 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java @@ -25,14 +25,6 @@ public interface BooleanFilter extends Filter { - /** - * Returns the child filters for this filter. - * - * This is a LinkedHashSet because we don't want duplicates, but the order is also important in some cases (such - * as when filters are provided in an order that encourages short-circuiting.) - */ - LinkedHashSet getFilters(); - @Override default Set getRequiredColumns() { diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index f30681c86691..c49bd97e7463 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -31,12 +31,28 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; +import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; @SubclassesMustOverrideEqualsAndHashCode public interface Filter { + default String getFilterString() + { + return toString(); + } + + /** + * Returns a LinkedHashSet of all child filters for this filter with no duplicates. + * + *

The ordering of child filters is important in some cases, e.x.short-curcuiting.

+ */ + default LinkedHashSet getFilters() + { + return new LinkedHashSet<>(); + } + /** * Compute indexes and build a container {@link FilterBundle} to be used during * {@link org.apache.druid.segment.Cursor} or {@link org.apache.druid.segment.vector.VectorCursor} creation, combining @@ -48,26 +64,26 @@ public interface Filter * See {@link FilterBundle} for additional details. * * @param columnIndexSelector - provides {@link org.apache.druid.segment.column.ColumnIndexSupplier} to fetch column - * indexes and {@link org.apache.druid.collections.bitmap.BitmapFactory} to manipulate - * them + * indexes and {@link org.apache.druid.collections.bitmap.BitmapFactory} to manipulate + * them * @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} operations to tie into - * {@link org.apache.druid.query.QueryMetrics} and build the output indexes + * {@link org.apache.druid.query.QueryMetrics} and build the output indexes * @param applyRowCount - upper bound on number of rows this filter would be applied to, after removing rows - * short-circuited by prior bundle operations. For example, given "x AND y", if "x" is - * resolved using an index, then "y" will receive the number of rows that matched - * the filter "x". As another example, given "x OR y", if "x" is resolved using an - * index, then "y" will receive the number of rows that did *not* match the filter "x". + * short-circuited by prior bundle operations. For example, given "x AND y", if "x" is + * resolved using an index, then "y" will receive the number of rows that matched + * the filter "x". As another example, given "x OR y", if "x" is resolved using an + * index, then "y" will receive the number of rows that did *not* match the filter "x". * @param totalRowCount - total number of rows to be scanned if no indexes are applied * @param includeUnknown - mapping for Druid native two state logic system into SQL three-state logic system. If - * set to true, bitmaps returned by this method should include true bits for any rows - * where the matching result is 'unknown', such as from the input being null valued. - * See {@link NullHandling#useThreeValueLogic()} - * @return - {@link FilterBundle} containing any indexes and/or matchers that are needed to build - * a cursor + * set to true, bitmaps returned by this method should include true bits for any rows + * where the matching result is 'unknown', such as from the input being null valued. + * See {@link NullHandling#useThreeValueLogic()} * @param - Type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default + * @return - {@link FilterBundle} containing any indexes and/or matchers that are needed to build + * a cursor */ default FilterBundle makeFilterBundle( - ColumnIndexSelector columnIndexSelector, + FilterBundle.Builder filterBundleBuilder, BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, @@ -76,24 +92,26 @@ default FilterBundle makeFilterBundle( { final FilterBundle.IndexBundle indexBundle; final boolean needMatcher; - final BitmapColumnIndex columnIndex = getBitmapColumnIndex(columnIndexSelector); + final BitmapColumnIndex columnIndex = filterBundleBuilder.getBitmapColumnIndex(); if (columnIndex != null) { final long bitmapConstructionStartNs = System.nanoTime(); - final T result = columnIndex.computeBitmapResult( - bitmapResultFactory, - applyRowCount, - totalRowCount, - includeUnknown + final T result = columnIndex.computeBitmapResult(bitmapResultFactory, + applyRowCount, + totalRowCount, + includeUnknown ); final long totalConstructionTimeNs = System.nanoTime() - bitmapConstructionStartNs; if (result == null) { indexBundle = null; } else { final ImmutableBitmap bitmap = bitmapResultFactory.toImmutableBitmap(result); - indexBundle = new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo(this::toString, bitmap.size(), totalConstructionTimeNs, null), - bitmap, - columnIndex.getIndexCapabilities() + indexBundle = new FilterBundle.SimpleIndexBundle(new FilterBundle.IndexBundleInfo(this::getFilterString, + bitmap.size(), + totalConstructionTimeNs, + null + ), + bitmap, + columnIndex.getIndexCapabilities() ); } needMatcher = result == null || !columnIndex.getIndexCapabilities().isExact(); @@ -103,11 +121,14 @@ default FilterBundle makeFilterBundle( } final FilterBundle.SimpleMatcherBundle matcherBundle; if (needMatcher) { - matcherBundle = new FilterBundle.SimpleMatcherBundle( - new FilterBundle.MatcherBundleInfo(this::toString, null, null), - this::makeMatcher, - this::makeVectorMatcher, - this.canVectorizeMatcher(columnIndexSelector) + matcherBundle = new FilterBundle.SimpleMatcherBundle(new FilterBundle.MatcherBundleInfo( + this::getFilterString, + null, + null + ), + this::makeMatcher, + this::makeVectorMatcher, + this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector()) ); } else { matcherBundle = null; @@ -122,7 +143,6 @@ default FilterBundle makeFilterBundle( * examine details about the index prior to computing it, via {@link BitmapColumnIndex#getIndexCapabilities()}. * * @param selector Object used to create BitmapColumnIndex - * * @return BitmapColumnIndex that can build ImmutableBitmap of matched row numbers */ @Nullable @@ -132,7 +152,6 @@ default FilterBundle makeFilterBundle( * Get a {@link ValueMatcher} that applies this filter to row values. * * @param factory Object used to create ValueMatchers - * * @return ValueMatcher that applies this filter to row values. */ ValueMatcher makeMatcher(ColumnSelectorFactory factory); @@ -141,7 +160,6 @@ default FilterBundle makeFilterBundle( * Get a {@link VectorValueMatcher} that applies this filter to row vectors. * * @param factory Object used to create ValueMatchers - * * @return VectorValueMatcher that applies this filter to row vectors. */ default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) @@ -151,6 +169,7 @@ default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory /** * Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method. + * * @param inspector Supplies type information for the selectors this filter will match against */ default boolean canVectorizeMatcher(ColumnInspector inspector) @@ -176,7 +195,7 @@ default boolean supportsRequiredColumnRewrite() * Return a copy of this filter that is identical to the this filter except that it operates on different columns, * based on a renaming map where the key is the column to be renamed in the filter, and the value is the new * column name. - * + *

* For example, if I have a filter (A = hello), and I have a renaming map (A -> B), * this should return the filter (B = hello) * diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index e105b0d6163b..62b6d7f7695c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -24,16 +24,20 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.column.ColumnIndexCapabilities; import org.apache.druid.segment.column.SimpleColumnIndexCapabilities; import org.apache.druid.segment.data.Offset; import org.apache.druid.segment.filter.FalseFilter; +import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.vector.ReadableVectorOffset; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.function.Function; @@ -59,29 +63,26 @@ public class FilterBundle { public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap) { - return new FilterBundle( - new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null), - emptyBitmap, - SimpleColumnIndexCapabilities.getConstant() - ), - null - ); + return new FilterBundle(new FilterBundle.SimpleIndexBundle( + new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null), + emptyBitmap, + SimpleColumnIndexCapabilities.getConstant() + ), null); } + // The index that actually gets used in a filter. + // The index bundle is nullable, meaning in a query not a single index has been chosen. When it's non-null, all fields have values. @Nullable private final IndexBundle indexBundle; @Nullable private final MatcherBundle matcherBundle; public FilterBundle( - @Nullable IndexBundle index, - @Nullable MatcherBundle matcherBundle + @Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle ) { - Preconditions.checkArgument( - index != null || matcherBundle != null, - "At least one of index or matcher must be not null" + Preconditions.checkArgument(index != null || matcherBundle != null, + "At least one of index or matcher must be not null" ); this.indexBundle = index; this.matcherBundle = matcherBundle; @@ -100,11 +101,91 @@ public MatcherBundle getMatcherBundle() return matcherBundle; } + public static class Builder + { + private final Filter filter; + private final ColumnIndexSelector columnIndexSelector; + @Nullable + private final BitmapColumnIndex bitmapColumnIndex; + private final List childBuilders; + private final int estimatedComputeCost; + + public Builder( + Filter filter, ColumnIndexSelector columnIndexSelector + ) + { + this.filter = filter; + this.columnIndexSelector = columnIndexSelector; + this.bitmapColumnIndex = filter.getBitmapColumnIndex(columnIndexSelector); + // Construct Builder instances for all child filters recursively. + this.childBuilders = new ArrayList<>(filter.getFilters().size()); + for (Filter childFilter : filter.getFilters()) { + childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector)); + } + // Sort child builders by cost in ASCENDING order, should be stable by default. + this.childBuilders.sort(Comparator.comparingInt(FilterBundle.Builder::getEstimatedComputeCost)); + this.estimatedComputeCost = calculateEstimatedComputeCost(); + } + + private int calculateEstimatedComputeCost() + { + if (this.bitmapColumnIndex == null) { + return Integer.MAX_VALUE; + } + int cost = this.bitmapColumnIndex.estimatedComputeCost(); + if (cost == Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + + for (FilterBundle.Builder childBuilder : this.childBuilders) { + int childCost = childBuilder.getEstimatedComputeCost(); + if (childCost >= Integer.MAX_VALUE - cost) { + return Integer.MAX_VALUE; + } + cost += childCost; + } + return cost; + } + + public Filter getFilter() + { + return this.filter; + } + + public ColumnIndexSelector getColumnIndexSelector() + { + return this.columnIndexSelector; + } + + @Nullable + public BitmapColumnIndex getBitmapColumnIndex() + { + return this.bitmapColumnIndex; + } + + public List getChildBuilders() + { + return this.childBuilders; + } + + public int getEstimatedComputeCost() + { + return this.estimatedComputeCost; + } + + public FilterBundle build( + BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, boolean includeUnknown + ) + { + return this.filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); + } + } + + public BundleInfo getInfo() { - return new BundleInfo( - indexBundle == null ? null : indexBundle.getIndexInfo(), - matcherBundle == null ? null : matcherBundle.getMatcherInfo() + return new BundleInfo(indexBundle == null ? null : indexBundle.getIndexInfo(), + matcherBundle == null ? null : matcherBundle.getMatcherInfo() ); } @@ -123,10 +204,12 @@ public boolean canVectorizeMatcher() return matcherBundle == null || matcherBundle.canVectorize(); } + // This interface only has one implementation public interface IndexBundle { IndexBundleInfo getIndexInfo(); + // The computed bitmap from the index ImmutableBitmap getBitmap(); ColumnIndexCapabilities getIndexCapabilities(); @@ -212,9 +295,7 @@ public MatcherBundleInfo getMatcherInfo() @Override public ValueMatcher valueMatcher( - ColumnSelectorFactory selectorFactory, - Offset baseOffset, - boolean descending + ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending ) { return matcherFn.apply(selectorFactory); @@ -222,8 +303,7 @@ public ValueMatcher valueMatcher( @Override public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, - ReadableVectorOffset baseOffset + VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset ) { return vectorMatcherFn.apply(selectorFactory); @@ -297,10 +377,7 @@ public static class IndexBundleInfo private final long buildTimeNs; public IndexBundleInfo( - Supplier filterString, - int selectionSize, - long buildTimeNs, - @Nullable List indexes + Supplier filterString, int selectionSize, long buildTimeNs, @Nullable List indexes ) { this.filter = filterString; @@ -339,12 +416,11 @@ public List getIndexes() */ public String describe() { - final StringBuilder sb = new StringBuilder() - .append("index: ") - .append(filter.get()) - .append(" (selectionSize = ") - .append(selectionSize) - .append(")\n"); + final StringBuilder sb = new StringBuilder().append("index: ") + .append(filter.get()) + .append(" (selectionSize = ") + .append(selectionSize) + .append(")\n"); if (indexes != null) { for (final IndexBundleInfo info : indexes) { @@ -358,12 +434,17 @@ public String describe() @Override public String toString() { - return "{" + - "filter=\"" + filter.get() + '\"' + - ", selectionSize=" + selectionSize + - ", buildTime=" + TimeUnit.NANOSECONDS.toMicros(buildTimeNs) + "μs" + - (indexes != null ? ", indexes=" + indexes : "") + - '}'; + return "{" + + "filter=\"" + + filter.get() + + '\"' + + ", selectionSize=" + + selectionSize + + ", buildTime=" + + TimeUnit.NANOSECONDS.toMicros(buildTimeNs) + + "μs" + + (indexes != null ? ", indexes=" + indexes : "") + + '}'; } } @@ -379,9 +460,7 @@ public static class MatcherBundleInfo private final IndexBundleInfo partialIndex; public MatcherBundleInfo( - Supplier filter, - @Nullable IndexBundleInfo partialIndex, - @Nullable List matchers + Supplier filter, @Nullable IndexBundleInfo partialIndex, @Nullable List matchers ) { this.filter = filter; @@ -415,10 +494,7 @@ public List getMatchers() */ public String describe() { - final StringBuilder sb = new StringBuilder() - .append("matcher: ") - .append(filter.get()) - .append("\n"); + final StringBuilder sb = new StringBuilder().append("matcher: ").append(filter.get()).append("\n"); if (partialIndex != null) { sb.append(" with partial ") @@ -437,11 +513,13 @@ public String describe() @Override public String toString() { - return "{" + - "filter=\"" + filter.get() + '\"' + - (partialIndex != null ? ", partialIndex=" + partialIndex : "") + - (matchers != null ? ", matchers=" + matchers : "") + - '}'; + return "{" + + "filter=\"" + + filter.get() + + '\"' + + (partialIndex != null ? ", partialIndex=" + partialIndex : "") + + (matchers != null ? ", matchers=" + matchers : "") + + '}'; } } } diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java index 5188b385b36e..99993b93eafe 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java @@ -346,7 +346,6 @@ private VectorColumnSelectorFactory makeVectorColumnSelectorFactoryForOffset( * @param timestamp the timestamp to search for * @param startIndex first index to search, inclusive * @param endIndex last index to search, exclusive - * * @return first index that has a timestamp equal to, or greater, than "timestamp" */ @VisibleForTesting @@ -708,7 +707,7 @@ public void close() throws IOException /** * Create a {@link FilterBundle} for a cursor hold instance. - * + *

* The provided filter must include the query-level interface if needed. To compute this properly, use * {@link #computeFilterWithIntervalIfNeeded}. */ @@ -732,8 +731,7 @@ private static FilterBundle makeFilterBundle( return null; } final long bitmapConstructionStartNs = System.nanoTime(); - final FilterBundle filterBundle = filter.makeFilterBundle( - bitmapIndexSelector, + final FilterBundle filterBundle = new FilterBundle.Builder(filter, bitmapIndexSelector).build( bitmapResultFactory, numRows, numRows, diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index dfc618acad89..7b70c1e6eaf5 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -46,10 +46,12 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; /** * Logical AND filter operation @@ -72,9 +74,14 @@ public AndFilter(List filters) this(new LinkedHashSet<>(filters)); } + @Override + public String getFilterString() { + return "AND"; + } + @Override public FilterBundle makeFilterBundle( - ColumnIndexSelector columnIndexSelector, + FilterBundle.Builder filterBundleBuilder, BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, @@ -97,20 +104,20 @@ public FilterBundle makeFilterBundle( // a nested AND filter might also partition itself into indexes and bundles, and since it is part of a logical AND // operation, this is valid (and even preferable). final long bitmapConstructionStartNs = System.nanoTime(); - for (Filter subfilter : filters) { - final FilterBundle subBundle = subfilter.makeFilterBundle( - columnIndexSelector, + for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { + final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle( + subFilterBundleBuilder, bitmapResultFactory, Math.min(applyRowCount, indexIntersectionSize), totalRowCount, includeUnknown ); - if (subBundle.getIndex() != null) { + if (subBundle.hasIndex()) { if (subBundle.getIndex().getBitmap().isEmpty()) { // if nothing matches for any sub filter, short-circuit, because nothing can possibly match return FilterBundle.allFalse( System.nanoTime() - bitmapConstructionStartNs, - columnIndexSelector.getBitmapFactory().makeEmptyImmutableBitmap() + subFilterBundleBuilder.getColumnIndexSelector().getBitmapFactory().makeEmptyImmutableBitmap() ); } merged = merged.merge(subBundle.getIndex().getIndexCapabilities()); @@ -122,7 +129,7 @@ public FilterBundle makeFilterBundle( } indexIntersectionSize = index.size(); } - if (subBundle.getMatcherBundle() != null) { + if (subBundle.hasMatcher()) { matcherBundles.add(subBundle.getMatcherBundle()); matcherBundleInfos.add(subBundle.getMatcherBundle().getMatcherInfo()); } @@ -180,7 +187,10 @@ public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset b } @Override - public VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset) + public VectorValueMatcher vectorMatcher( + VectorColumnSelectorFactory selectorFactory, + ReadableVectorOffset baseOffset + ) { final VectorValueMatcher[] vectorMatchers = new VectorValueMatcher[matcherBundles.size()]; for (int i = 0; i < matcherBundles.size(); i++) { @@ -239,6 +249,13 @@ public ColumnIndexCapabilities getIndexCapabilities() return finalMerged; } + @Override + public int estimatedComputeCost() + { + // There's no additional cost on AND filter, cost in child filters would be used. + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 700b2fbfa168..4f0f0a1def6f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -80,7 +80,7 @@ public OrFilter(List filters) @Override public FilterBundle makeFilterBundle( - ColumnIndexSelector columnIndexSelector, + FilterBundle.Builder filterBundleBuilder, BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, @@ -107,10 +107,9 @@ public FilterBundle makeFilterBundle( int emptyCount = 0; final long bitmapConstructionStartNs = System.nanoTime(); - - for (Filter subfilter : filters) { - final FilterBundle bundle = subfilter.makeFilterBundle( - columnIndexSelector, + for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { + final FilterBundle bundle = subFilterBundleBuilder.getFilter().makeFilterBundle( + subFilterBundleBuilder, bitmapResultFactory, Math.min(applyRowCount, totalRowCount - indexUnionSize), totalRowCount, @@ -156,7 +155,7 @@ public FilterBundle makeFilterBundle( if (index == null || index.isEmpty()) { return FilterBundle.allFalse( totalBitmapConstructTimeNs, - columnIndexSelector.getBitmapFactory().makeEmptyImmutableBitmap() + filterBundleBuilder.getColumnIndexSelector().getBitmapFactory().makeEmptyImmutableBitmap() ); } if (indexOnlyBundles.size() == 1) { @@ -287,11 +286,20 @@ public ColumnIndexCapabilities getIndexCapabilities() return finalMerged; } + @Override + public int estimatedComputeCost() + { + // There's no additional cost on OR filter, cost in child filters would be used. + return 0; + } + @Override public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { return bitmapResultFactory.union( - () -> bitmapColumnIndices.stream().map(x -> x.computeBitmapResult(bitmapResultFactory, includeUnknown)).iterator() + () -> bitmapColumnIndices.stream() + .map(x -> x.computeBitmapResult(bitmapResultFactory, includeUnknown)) + .iterator() ); } @@ -693,6 +701,7 @@ private static VectorValueMatcher convertIndexToVectorValueMatcher( { final VectorMatch match = VectorMatch.wrap(new int[vectorOffset.getMaxVectorSize()]); int iterOffset = -1; + @Override public ReadableVectorMatch match(ReadableVectorMatch mask, boolean includeUnknown) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java index 04a5bb8b6b5c..27ba55f9ebac 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java @@ -35,6 +35,13 @@ public interface BitmapColumnIndex { ColumnIndexCapabilities getIndexCapabilities(); + /** + * Returns an estimated cost for computing the bitmap result. + */ + default int estimatedComputeCost() { + return Integer.MAX_VALUE; + } + /** * Compute a bitmap result wrapped with the {@link BitmapResultFactory} representing the rows matched by this index. * If building a cursor, use {@link #computeBitmapResult(BitmapResultFactory, int, int, boolean)} instead. diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java index aa7645354440..d1a65f7ad5f4 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java @@ -317,8 +317,7 @@ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike() protected FilterBundle makeFilterBundle(final Filter filter) { - return filter.makeFilterBundle( - indexSelector, + return new FilterBundle.Builder(filter, indexSelector).build( new DefaultBitmapResultFactory(bitmapFactory), indexSelector.getNumRows(), indexSelector.getNumRows(), From 73c7805712fc64af109fa75aee2ec3ecd1387a52 Mon Sep 17 00:00:00 2001 From: cecemei Date: Wed, 11 Sep 2024 18:22:03 -0700 Subject: [PATCH 02/10] Reformat. --- .../org/apache/druid/query/filter/Filter.java | 60 ++++++++++--------- .../druid/query/filter/FilterBundle.java | 18 +++--- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index c49bd97e7463..1079dd3e2e30 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -45,7 +45,6 @@ default String getFilterString() /** * Returns a LinkedHashSet of all child filters for this filter with no duplicates. - * *

The ordering of child filters is important in some cases, e.x.short-curcuiting.

*/ default LinkedHashSet getFilters() @@ -63,23 +62,22 @@ default LinkedHashSet getFilters() * cursor. If both are set, the cursor will effectively perform a logical AND to combine them. * See {@link FilterBundle} for additional details. * - * @param columnIndexSelector - provides {@link org.apache.druid.segment.column.ColumnIndexSupplier} to fetch column - * indexes and {@link org.apache.druid.collections.bitmap.BitmapFactory} to manipulate - * them - * @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} operations to tie into + * @param filterBundleBuilder contains {@link BitmapColumnIndex} and {@link ColumnIndexSelector}, and some additional + * info needed. + * @param bitmapResultFactory wrapper for {@link ImmutableBitmap} operations to tie into * {@link org.apache.druid.query.QueryMetrics} and build the output indexes - * @param applyRowCount - upper bound on number of rows this filter would be applied to, after removing rows + * @param applyRowCount upper bound on number of rows this filter would be applied to, after removing rows * short-circuited by prior bundle operations. For example, given "x AND y", if "x" is * resolved using an index, then "y" will receive the number of rows that matched * the filter "x". As another example, given "x OR y", if "x" is resolved using an * index, then "y" will receive the number of rows that did *not* match the filter "x". - * @param totalRowCount - total number of rows to be scanned if no indexes are applied - * @param includeUnknown - mapping for Druid native two state logic system into SQL three-state logic system. If + * @param totalRowCount total number of rows to be scanned if no indexes are applied + * @param includeUnknown mapping for Druid native two state logic system into SQL three-state logic system. If * set to true, bitmaps returned by this method should include true bits for any rows * where the matching result is 'unknown', such as from the input being null valued. * See {@link NullHandling#useThreeValueLogic()} - * @param - Type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default - * @return - {@link FilterBundle} containing any indexes and/or matchers that are needed to build + * @param type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default + * @return {@link FilterBundle} containing any indexes and/or matchers that are needed to build * a cursor */ default FilterBundle makeFilterBundle( @@ -95,23 +93,26 @@ default FilterBundle makeFilterBundle( final BitmapColumnIndex columnIndex = filterBundleBuilder.getBitmapColumnIndex(); if (columnIndex != null) { final long bitmapConstructionStartNs = System.nanoTime(); - final T result = columnIndex.computeBitmapResult(bitmapResultFactory, - applyRowCount, - totalRowCount, - includeUnknown + final T result = columnIndex.computeBitmapResult( + bitmapResultFactory, + applyRowCount, + totalRowCount, + includeUnknown ); final long totalConstructionTimeNs = System.nanoTime() - bitmapConstructionStartNs; if (result == null) { indexBundle = null; } else { final ImmutableBitmap bitmap = bitmapResultFactory.toImmutableBitmap(result); - indexBundle = new FilterBundle.SimpleIndexBundle(new FilterBundle.IndexBundleInfo(this::getFilterString, - bitmap.size(), - totalConstructionTimeNs, - null - ), - bitmap, - columnIndex.getIndexCapabilities() + indexBundle = new FilterBundle.SimpleIndexBundle( + new FilterBundle.IndexBundleInfo( + this::getFilterString, + bitmap.size(), + totalConstructionTimeNs, + null + ), + bitmap, + columnIndex.getIndexCapabilities() ); } needMatcher = result == null || !columnIndex.getIndexCapabilities().isExact(); @@ -121,14 +122,15 @@ default FilterBundle makeFilterBundle( } final FilterBundle.SimpleMatcherBundle matcherBundle; if (needMatcher) { - matcherBundle = new FilterBundle.SimpleMatcherBundle(new FilterBundle.MatcherBundleInfo( - this::getFilterString, - null, - null - ), - this::makeMatcher, - this::makeVectorMatcher, - this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector()) + matcherBundle = new FilterBundle.SimpleMatcherBundle( + new FilterBundle.MatcherBundleInfo( + this::getFilterString, + null, + null + ), + this::makeMatcher, + this::makeVectorMatcher, + this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector()) ); } else { matcherBundle = null; diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index 62b6d7f7695c..9bc162ea6b98 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -81,8 +81,9 @@ public FilterBundle( @Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle ) { - Preconditions.checkArgument(index != null || matcherBundle != null, - "At least one of index or matcher must be not null" + Preconditions.checkArgument( + index != null || matcherBundle != null, + "At least one of index or matcher must be not null" ); this.indexBundle = index; this.matcherBundle = matcherBundle; @@ -101,6 +102,10 @@ public MatcherBundle getMatcherBundle() return matcherBundle; } + /** + * Wraps info needed to build a {@link FilterBundle}, and provides an estimated compute cost for + * {@link BitmapColumnIndex#computeBitmapResult}. + */ public static class Builder { private final Filter filter; @@ -110,9 +115,7 @@ public static class Builder private final List childBuilders; private final int estimatedComputeCost; - public Builder( - Filter filter, ColumnIndexSelector columnIndexSelector - ) + public Builder(Filter filter, ColumnIndexSelector columnIndexSelector) { this.filter = filter; this.columnIndexSelector = columnIndexSelector; @@ -184,8 +187,9 @@ public FilterBundle build( public BundleInfo getInfo() { - return new BundleInfo(indexBundle == null ? null : indexBundle.getIndexInfo(), - matcherBundle == null ? null : matcherBundle.getMatcherInfo() + return new BundleInfo( + indexBundle == null ? null : indexBundle.getIndexInfo(), + matcherBundle == null ? null : matcherBundle.getMatcherInfo() ); } From 8e205cad7746ba9b6628770955691e8410d4bbc3 Mon Sep 17 00:00:00 2001 From: cecemei Date: Wed, 11 Sep 2024 22:08:45 -0700 Subject: [PATCH 03/10] Reformat again --- .../druid/query/filter/BooleanFilter.java | 1 - .../org/apache/druid/query/filter/Filter.java | 13 +- .../druid/query/filter/FilterBundle.java | 27 +-- .../druid/segment/filter/AndFilter.java | 189 ++++++++---------- .../apache/druid/segment/filter/OrFilter.java | 2 +- .../segment/index/BitmapColumnIndex.java | 5 +- 6 files changed, 107 insertions(+), 130 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java index 2cb8a68b6e91..4c3cedefe6db 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java @@ -20,7 +20,6 @@ package org.apache.druid.query.filter; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.Set; public interface BooleanFilter extends Filter diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index 1079dd3e2e30..3adfc4e80f8a 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -105,12 +105,7 @@ default FilterBundle makeFilterBundle( } else { final ImmutableBitmap bitmap = bitmapResultFactory.toImmutableBitmap(result); indexBundle = new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo( - this::getFilterString, - bitmap.size(), - totalConstructionTimeNs, - null - ), + new FilterBundle.IndexBundleInfo(this::toString, bitmap.size(), totalConstructionTimeNs, null), bitmap, columnIndex.getIndexCapabilities() ); @@ -123,11 +118,7 @@ default FilterBundle makeFilterBundle( final FilterBundle.SimpleMatcherBundle matcherBundle; if (needMatcher) { matcherBundle = new FilterBundle.SimpleMatcherBundle( - new FilterBundle.MatcherBundleInfo( - this::getFilterString, - null, - null - ), + new FilterBundle.MatcherBundleInfo(this::getFilterString, null, null), this::makeMatcher, this::makeVectorMatcher, this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector()) diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index 9bc162ea6b98..f21a42325007 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -70,16 +70,12 @@ public static FilterBundle allFalse(long constructionTime, ImmutableBitmap empty ), null); } - // The index that actually gets used in a filter. - // The index bundle is nullable, meaning in a query not a single index has been chosen. When it's non-null, all fields have values. @Nullable private final IndexBundle indexBundle; @Nullable private final MatcherBundle matcherBundle; - public FilterBundle( - @Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle - ) + public FilterBundle(@Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle) { Preconditions.checkArgument( index != null || matcherBundle != null, @@ -177,7 +173,10 @@ public int getEstimatedComputeCost() } public FilterBundle build( - BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, boolean includeUnknown + BitmapResultFactory bitmapResultFactory, + int applyRowCount, + int totalRowCount, + boolean includeUnknown ) { return this.filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); @@ -298,16 +297,15 @@ public MatcherBundleInfo getMatcherInfo() } @Override - public ValueMatcher valueMatcher( - ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending - ) + public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending) { return matcherFn.apply(selectorFactory); } @Override public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset + VectorColumnSelectorFactory selectorFactory, + ReadableVectorOffset baseOffset ) { return vectorMatcherFn.apply(selectorFactory); @@ -381,7 +379,10 @@ public static class IndexBundleInfo private final long buildTimeNs; public IndexBundleInfo( - Supplier filterString, int selectionSize, long buildTimeNs, @Nullable List indexes + Supplier filterString, + int selectionSize, + long buildTimeNs, + @Nullable List indexes ) { this.filter = filterString; @@ -464,7 +465,9 @@ public static class MatcherBundleInfo private final IndexBundleInfo partialIndex; public MatcherBundleInfo( - Supplier filter, @Nullable IndexBundleInfo partialIndex, @Nullable List matchers + Supplier filter, + @Nullable IndexBundleInfo partialIndex, + @Nullable List matchers ) { this.filter = filter; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 7b70c1e6eaf5..cc65663994c5 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -46,12 +46,10 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.stream.Collectors; /** * Logical AND filter operation @@ -74,8 +72,68 @@ public AndFilter(List filters) this(new LinkedHashSet<>(filters)); } + public static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new ValueMatcher() + { + @Override + public boolean matches(boolean includeUnknown) + { + for (ValueMatcher matcher : baseMatchers) { + if (!matcher.matches(includeUnknown)) { + return false; + } + } + return true; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("firstBaseMatcher", baseMatchers[0]); + inspector.visit("secondBaseMatcher", baseMatchers[1]); + // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. + // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. + } + }; + } + + public static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } + + return new BaseVectorValueMatcher(baseMatchers[0]) + { + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean includeUnknown) + { + ReadableVectorMatch match = mask; + + for (VectorValueMatcher matcher : baseMatchers) { + if (match.isAllFalse()) { + // Short-circuit if the entire vector is false. + break; + } + match = matcher.match(match, includeUnknown); + } + + assert match.isValid(mask); + return match; + } + }; + } + @Override - public String getFilterString() { + public String getFilterString() + { return "AND"; } @@ -105,19 +163,22 @@ public FilterBundle makeFilterBundle( // operation, this is valid (and even preferable). final long bitmapConstructionStartNs = System.nanoTime(); for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { - final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle( - subFilterBundleBuilder, - bitmapResultFactory, - Math.min(applyRowCount, indexIntersectionSize), - totalRowCount, - includeUnknown + final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle(subFilterBundleBuilder, + bitmapResultFactory, + Math.min( + applyRowCount, + indexIntersectionSize + ), + totalRowCount, + includeUnknown ); if (subBundle.hasIndex()) { if (subBundle.getIndex().getBitmap().isEmpty()) { // if nothing matches for any sub filter, short-circuit, because nothing can possibly match - return FilterBundle.allFalse( - System.nanoTime() - bitmapConstructionStartNs, - subFilterBundleBuilder.getColumnIndexSelector().getBitmapFactory().makeEmptyImmutableBitmap() + return FilterBundle.allFalse(System.nanoTime() - bitmapConstructionStartNs, + subFilterBundleBuilder.getColumnIndexSelector() + .getBitmapFactory() + .makeEmptyImmutableBitmap() ); } merged = merged.merge(subBundle.getIndex().getIndexCapabilities()); @@ -138,18 +199,13 @@ public FilterBundle makeFilterBundle( final FilterBundle.IndexBundle indexBundle; if (index != null) { if (indexBundleInfos.size() == 1) { - indexBundle = new FilterBundle.SimpleIndexBundle( - indexBundleInfos.get(0), - index, - merged - ); + indexBundle = new FilterBundle.SimpleIndexBundle(indexBundleInfos.get(0), index, merged); } else { indexBundle = new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo( - () -> "AND", - indexIntersectionSize, - System.nanoTime() - bitmapConstructionStartNs, - indexBundleInfos + new FilterBundle.IndexBundleInfo(() -> "AND", + indexIntersectionSize, + System.nanoTime() - bitmapConstructionStartNs, + indexBundleInfos ), index, merged @@ -169,11 +225,7 @@ public FilterBundle.MatcherBundleInfo getMatcherInfo() if (matcherBundles.size() == 1) { return matcherBundleInfos.get(0); } - return new FilterBundle.MatcherBundleInfo( - () -> "AND", - null, - matcherBundleInfos - ); + return new FilterBundle.MatcherBundleInfo(() -> "AND", null, matcherBundleInfos); } @Override @@ -188,8 +240,7 @@ public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset b @Override public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, - ReadableVectorOffset baseOffset + VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset ) { final VectorValueMatcher[] vectorMatchers = new VectorValueMatcher[matcherBundles.size()]; @@ -214,10 +265,7 @@ public boolean canVectorize() matcherBundle = null; } - return new FilterBundle( - indexBundle, - matcherBundle - ); + return new FilterBundle(indexBundle, matcherBundle); } @Nullable @@ -252,7 +300,7 @@ public ColumnIndexCapabilities getIndexCapabilities() @Override public int estimatedComputeCost() { - // There's no additional cost on AND filter, cost in child filters would be used. + // There's no additional cost on AND filter, cost in child filters would be summed. return 0; } @@ -274,19 +322,15 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boo @Nullable @Override public T computeBitmapResult( - BitmapResultFactory bitmapResultFactory, - int applyRowCount, - int totalRowCount, - boolean includeUnknown + BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, boolean includeUnknown ) { final List bitmapResults = new ArrayList<>(bitmapColumnIndices.size()); for (final BitmapColumnIndex index : bitmapColumnIndices) { - final T bitmapResult = index.computeBitmapResult( - bitmapResultFactory, - applyRowCount, - totalRowCount, - includeUnknown + final T bitmapResult = index.computeBitmapResult(bitmapResultFactory, + applyRowCount, + totalRowCount, + includeUnknown ); if (bitmapResult == null) { // all or nothing @@ -367,65 +411,6 @@ public String toString() return StringUtils.format("(%s)", AND_JOINER.join(filters)); } - public static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new ValueMatcher() - { - @Override - public boolean matches(boolean includeUnknown) - { - for (ValueMatcher matcher : baseMatchers) { - if (!matcher.matches(includeUnknown)) { - return false; - } - } - return true; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("firstBaseMatcher", baseMatchers[0]); - inspector.visit("secondBaseMatcher", baseMatchers[1]); - // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. - // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. - } - }; - } - - public static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new BaseVectorValueMatcher(baseMatchers[0]) - { - @Override - public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean includeUnknown) - { - ReadableVectorMatch match = mask; - - for (VectorValueMatcher matcher : baseMatchers) { - if (match.isAllFalse()) { - // Short-circuit if the entire vector is false. - break; - } - match = matcher.match(match, includeUnknown); - } - - assert match.isValid(mask); - return match; - } - }; - } - @Override public boolean equals(Object o) { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 4f0f0a1def6f..187e1d6db569 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -289,7 +289,7 @@ public ColumnIndexCapabilities getIndexCapabilities() @Override public int estimatedComputeCost() { - // There's no additional cost on OR filter, cost in child filters would be used. + // There's no additional cost on OR filter, cost in child filters would be summed. return 0; } diff --git a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java index 27ba55f9ebac..f28429969d33 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/BitmapColumnIndex.java @@ -38,7 +38,8 @@ public interface BitmapColumnIndex /** * Returns an estimated cost for computing the bitmap result. */ - default int estimatedComputeCost() { + default int estimatedComputeCost() + { return Integer.MAX_VALUE; } @@ -52,7 +53,6 @@ default int estimatedComputeCost() { * to true, bitmaps returned by this method should include true bits for any rows where * the matching result is 'unknown', such as from the input being null valued. * See {@link NullHandling#useThreeValueLogic()}. - * * @return bitmap result representing rows matched by this index */ T computeBitmapResult( @@ -76,7 +76,6 @@ T computeBitmapResult( * set to true, bitmaps returned by this method should include true bits for any rows where * the matching result is 'unknown', such as from the input being null valued. * See {@link NullHandling#useThreeValueLogic()}. - * * @return bitmap result representing rows matched by this index */ @Nullable From 29683b99e3f9003a8b7ce5b9eeac2edfe54e9e64 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 12 Sep 2024 11:24:09 -0700 Subject: [PATCH 04/10] Reformat --- .../druid/query/filter/FilterBundle.java | 129 +- .../druid/segment/filter/AndFilter.java | 49 +- .../apache/druid/segment/filter/OrFilter.java | 1107 ++++++++--------- 3 files changed, 643 insertions(+), 642 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index f21a42325007..881189699841 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -61,15 +61,6 @@ */ public class FilterBundle { - public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap) - { - return new FilterBundle(new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null), - emptyBitmap, - SimpleColumnIndexCapabilities.getConstant() - ), null); - } - @Nullable private final IndexBundle indexBundle; @Nullable @@ -85,6 +76,17 @@ public FilterBundle(@Nullable IndexBundle index, @Nullable MatcherBundle matcher this.matcherBundle = matcherBundle; } + public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap) + { + return new FilterBundle( + new FilterBundle.SimpleIndexBundle( + new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null), + emptyBitmap, + SimpleColumnIndexCapabilities.getConstant() + ), + null + ); + } @Nullable public IndexBundle getIndex() @@ -98,6 +100,57 @@ public MatcherBundle getMatcherBundle() return matcherBundle; } + public BundleInfo getInfo() + { + return new BundleInfo( + indexBundle == null ? null : indexBundle.getIndexInfo(), + matcherBundle == null ? null : matcherBundle.getMatcherInfo() + ); + } + + public boolean hasIndex() + { + return indexBundle != null; + } + + public boolean hasMatcher() + { + return matcherBundle != null; + } + + public boolean canVectorizeMatcher() + { + return matcherBundle == null || matcherBundle.canVectorize(); + } + + public interface IndexBundle + { + IndexBundleInfo getIndexInfo(); + + ImmutableBitmap getBitmap(); + + ColumnIndexCapabilities getIndexCapabilities(); + } + + /** + * Builder of {@link ValueMatcher} and {@link VectorValueMatcher}. The + * {@link #valueMatcher(ColumnSelectorFactory, Offset, boolean)} function also passes in the base offset and whether + * the offset is 'descending' or not, to allow filters more flexibility in value matcher creation. + * {@link org.apache.druid.segment.filter.OrFilter} uses these extra parameters to allow partial use of indexes to + * create a synthetic value matcher that checks if the row is set in the bitmap, instead of purely using value + * matchers, with {@link org.apache.druid.segment.filter.OrFilter#convertIndexToValueMatcher}. + */ + public interface MatcherBundle + { + MatcherBundleInfo getMatcherInfo(); + + ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending); + + VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset); + + boolean canVectorize(); + } + /** * Wraps info needed to build a {@link FilterBundle}, and provides an estimated compute cost for * {@link BitmapColumnIndex#computeBitmapResult}. @@ -183,60 +236,6 @@ public FilterBundle build( } } - - public BundleInfo getInfo() - { - return new BundleInfo( - indexBundle == null ? null : indexBundle.getIndexInfo(), - matcherBundle == null ? null : matcherBundle.getMatcherInfo() - ); - } - - public boolean hasIndex() - { - return indexBundle != null; - } - - public boolean hasMatcher() - { - return matcherBundle != null; - } - - public boolean canVectorizeMatcher() - { - return matcherBundle == null || matcherBundle.canVectorize(); - } - - // This interface only has one implementation - public interface IndexBundle - { - IndexBundleInfo getIndexInfo(); - - // The computed bitmap from the index - ImmutableBitmap getBitmap(); - - ColumnIndexCapabilities getIndexCapabilities(); - } - - /** - * Builder of {@link ValueMatcher} and {@link VectorValueMatcher}. The - * {@link #valueMatcher(ColumnSelectorFactory, Offset, boolean)} function also passes in the base offset and whether - * the offset is 'descending' or not, to allow filters more flexibility in value matcher creation. - * {@link org.apache.druid.segment.filter.OrFilter} uses these extra parameters to allow partial use of indexes to - * create a synthetic value matcher that checks if the row is set in the bitmap, instead of purely using value - * matchers, with {@link org.apache.druid.segment.filter.OrFilter#convertIndexToValueMatcher}. - */ - public interface MatcherBundle - { - MatcherBundleInfo getMatcherInfo(); - - ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending); - - VectorValueMatcher vectorMatcher(VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset); - - boolean canVectorize(); - } - public static class SimpleIndexBundle implements IndexBundle { private final IndexBundleInfo info; @@ -456,11 +455,9 @@ public String toString() public static class MatcherBundleInfo { private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^"); - - private final Supplier filter; @Nullable final List matchers; - + private final Supplier filter; @Nullable private final IndexBundleInfo partialIndex; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index cc65663994c5..246a7d6fccdb 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -163,22 +163,21 @@ public FilterBundle makeFilterBundle( // operation, this is valid (and even preferable). final long bitmapConstructionStartNs = System.nanoTime(); for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { - final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle(subFilterBundleBuilder, - bitmapResultFactory, - Math.min( - applyRowCount, - indexIntersectionSize - ), - totalRowCount, - includeUnknown + final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle( + subFilterBundleBuilder, + bitmapResultFactory, + Math.min(applyRowCount, indexIntersectionSize), + totalRowCount, + includeUnknown ); if (subBundle.hasIndex()) { if (subBundle.getIndex().getBitmap().isEmpty()) { // if nothing matches for any sub filter, short-circuit, because nothing can possibly match - return FilterBundle.allFalse(System.nanoTime() - bitmapConstructionStartNs, - subFilterBundleBuilder.getColumnIndexSelector() - .getBitmapFactory() - .makeEmptyImmutableBitmap() + return FilterBundle.allFalse( + System.nanoTime() - bitmapConstructionStartNs, + subFilterBundleBuilder.getColumnIndexSelector() + .getBitmapFactory() + .makeEmptyImmutableBitmap() ); } merged = merged.merge(subBundle.getIndex().getIndexCapabilities()); @@ -202,10 +201,11 @@ public FilterBundle makeFilterBundle( indexBundle = new FilterBundle.SimpleIndexBundle(indexBundleInfos.get(0), index, merged); } else { indexBundle = new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo(() -> "AND", - indexIntersectionSize, - System.nanoTime() - bitmapConstructionStartNs, - indexBundleInfos + new FilterBundle.IndexBundleInfo( + () -> "AND", + indexIntersectionSize, + System.nanoTime() - bitmapConstructionStartNs, + indexBundleInfos ), index, merged @@ -240,7 +240,8 @@ public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset b @Override public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, ReadableVectorOffset baseOffset + VectorColumnSelectorFactory selectorFactory, + ReadableVectorOffset baseOffset ) { final VectorValueMatcher[] vectorMatchers = new VectorValueMatcher[matcherBundles.size()]; @@ -322,15 +323,19 @@ public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boo @Nullable @Override public T computeBitmapResult( - BitmapResultFactory bitmapResultFactory, int applyRowCount, int totalRowCount, boolean includeUnknown + BitmapResultFactory bitmapResultFactory, + int applyRowCount, + int totalRowCount, + boolean includeUnknown ) { final List bitmapResults = new ArrayList<>(bitmapColumnIndices.size()); for (final BitmapColumnIndex index : bitmapColumnIndices) { - final T bitmapResult = index.computeBitmapResult(bitmapResultFactory, - applyRowCount, - totalRowCount, - includeUnknown + final T bitmapResult = index.computeBitmapResult( + bitmapResultFactory, + applyRowCount, + totalRowCount, + includeUnknown ); if (bitmapResult == null) { // all or nothing diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 187e1d6db569..7fb867872df2 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -78,432 +78,100 @@ public OrFilter(List filters) this(new LinkedHashSet<>(filters)); } - @Override - public FilterBundle makeFilterBundle( - FilterBundle.Builder filterBundleBuilder, - BitmapResultFactory bitmapResultFactory, - int applyRowCount, - int totalRowCount, - boolean includeUnknown - ) + private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) { - // for OR filters, we have a few possible outcomes: - // 1 - all clauses are index only bundles. in this case we union the bitmaps together and make an index only bundle - // 2 - some clauses support indexes. in this case, we union the bitmaps of any index only bundles together to form a - // partial index which is constructed into a matcher bundle with convertIndexToMatcherBundle. We translate any - // index AND matcher bundles into a matcher only bundle with convertBundleToMatcherOnlyBundle. Finally, we - // combine these with the remaining matcher only bundles to with makeMatcher/makeVectorMatcher to make a matcher - // only bundle - // 3 - no clauses support indexes. in this case, we make a matcher only bundle using makeMatcher/makeVectorMatcher - - final List indexOnlyBundles = new ArrayList<>(); - final List indexOnlyBundlesInfo = new ArrayList<>(); - final List partialIndexBundles = new ArrayList<>(); - final List matcherOnlyBundles = new ArrayList<>(); + Preconditions.checkState(baseMatchers.length > 0); - int indexUnionSize = 0; - ImmutableBitmap index = null; - ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true); - int emptyCount = 0; + if (baseMatchers.length == 1) { + return baseMatchers[0]; + } - final long bitmapConstructionStartNs = System.nanoTime(); - for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { - final FilterBundle bundle = subFilterBundleBuilder.getFilter().makeFilterBundle( - subFilterBundleBuilder, - bitmapResultFactory, - Math.min(applyRowCount, totalRowCount - indexUnionSize), - totalRowCount, - includeUnknown - ); - if (bundle.hasIndex()) { - final ImmutableBitmap bundleIndex = bundle.getIndex().getBitmap(); - if (bundleIndex.isEmpty()) { - // we leave any indexes which are empty out of index, indexOnlyBundles, and partialIndexBundles - // even though we skip them, we still keep track of them to check for the case when we can build the OR into - // an index only bundle. We can count index and matcher bundles here too because the AND operation means that - // an empty index means the matcher can be skipped - emptyCount++; - } else { - if (bundle.hasMatcher()) { - // index and matcher bundles must be handled separately, they will need to be a single value matcher built - // by doing an AND operation between the index and the value matcher - // (a bundle is basically an AND operation between the index and matcher if the matcher is present) - partialIndexBundles.add(convertBundleToMatcherOnlyBundle(bundle, bundleIndex)); - } else { - indexOnlyBundles.add(bundle.getIndex()); - indexOnlyBundlesInfo.add(bundle.getIndex().getIndexInfo()); - merged.merge(bundle.getIndex().getIndexCapabilities()); - // union index only bitmaps together; if all sub-filters are 'index only' bundles we will make an index only - // bundle ourselves, else we will use this index as a single value matcher - if (index == null) { - index = bundle.getIndex().getBitmap(); - } else { - index = index.union(bundle.getIndex().getBitmap()); - } - indexUnionSize = index.size(); + return new ValueMatcher() + { + @Override + public boolean matches(boolean includeUnknown) + { + for (ValueMatcher matcher : baseMatchers) { + if (matcher.matches(includeUnknown)) { + return true; } } - } else { - matcherOnlyBundles.add(bundle.getMatcherBundle()); + return false; } - } - final long totalBitmapConstructTimeNs = System.nanoTime() - bitmapConstructionStartNs; - - // if all the filters are 'index only', we can make an index only bundle - if (indexOnlyBundles.size() + emptyCount == filters.size()) { - if (index == null || index.isEmpty()) { - return FilterBundle.allFalse( - totalBitmapConstructTimeNs, - filterBundleBuilder.getColumnIndexSelector().getBitmapFactory().makeEmptyImmutableBitmap() - ); - } - if (indexOnlyBundles.size() == 1) { - return new FilterBundle( - indexOnlyBundles.get(0), - null - ); + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("firstBaseMatcher", baseMatchers[0]); + inspector.visit("secondBaseMatcher", baseMatchers[1]); + // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. + // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. } - return new FilterBundle( - new FilterBundle.SimpleIndexBundle( - new FilterBundle.IndexBundleInfo( - () -> "OR", - applyRowCount, - totalBitmapConstructTimeNs, - indexOnlyBundlesInfo - ), - index, - merged - ), - null - ); - } + }; + } - // if not the index only outcome, we build a matcher only bundle from all the matchers - final int estimatedSize = (indexOnlyBundles.isEmpty() ? 0 : 1) - + partialIndexBundles.size() - + matcherOnlyBundles.size(); - final List allMatcherBundles = Lists.newArrayListWithCapacity(estimatedSize); - final List allMatcherBundlesInfo = Lists.newArrayListWithCapacity(estimatedSize); - if (!indexOnlyBundles.isEmpty()) { - // translate the indexOnly bundles into a single matcher - final FilterBundle.MatcherBundle matcherBundle = convertIndexToMatcherBundle( - applyRowCount, - indexOnlyBundles, - indexOnlyBundlesInfo, - totalBitmapConstructTimeNs, - index - ); - allMatcherBundles.add(matcherBundle); - allMatcherBundlesInfo.add(matcherBundle.getMatcherInfo()); - } - for (FilterBundle.MatcherBundle bundle : partialIndexBundles) { - allMatcherBundles.add(bundle); - allMatcherBundlesInfo.add(bundle.getMatcherInfo()); - } - for (FilterBundle.MatcherBundle bundle : matcherOnlyBundles) { - allMatcherBundles.add(bundle); - allMatcherBundlesInfo.add(bundle.getMatcherInfo()); + private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) + { + Preconditions.checkState(baseMatchers.length > 0); + if (baseMatchers.length == 1) { + return baseMatchers[0]; } - return new FilterBundle( - null, - new FilterBundle.MatcherBundle() - { - @Override - public FilterBundle.MatcherBundleInfo getMatcherInfo() - { - return new FilterBundle.MatcherBundleInfo( - () -> "OR", - null, - allMatcherBundlesInfo - ); - } + return new BaseVectorValueMatcher(baseMatchers[0]) + { + final VectorMatch currentMask = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch scratch = VectorMatch.wrap(new int[getMaxVectorSize()]); + final VectorMatch retVal = VectorMatch.wrap(new int[getMaxVectorSize()]); - @Override - public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending) - { - final ValueMatcher[] matchers = new ValueMatcher[allMatcherBundles.size()]; - for (int i = 0; i < allMatcherBundles.size(); i++) { - matchers[i] = allMatcherBundles.get(i).valueMatcher(selectorFactory, baseOffset, descending); - } - return makeMatcher(matchers); - } + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean includeUnknown) + { + ReadableVectorMatch currentMatch = baseMatchers[0].match(mask, includeUnknown); - @Override - public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, - ReadableVectorOffset baseOffset - ) - { - final VectorValueMatcher[] matchers = new VectorValueMatcher[allMatcherBundles.size()]; - for (int i = 0; i < allMatcherBundles.size(); i++) { - matchers[i] = allMatcherBundles.get(i).vectorMatcher(selectorFactory, baseOffset); - } - return makeVectorMatcher(matchers); + // Initialize currentMask = mask, then progressively remove rows from the mask as we find matches for them. + // This isn't necessary for correctness (we could use the original "mask" on every call to "match") but it + // allows for short-circuiting on a row-by-row basis. + currentMask.copyFrom(mask); + + // Initialize retVal = currentMatch, the rows matched by the first matcher. We'll add more as we loop over + // the rest of the matchers. + retVal.copyFrom(currentMatch); + + for (int i = 1; i < baseMatchers.length; i++) { + if (retVal.isAllTrue(getCurrentVectorSize())) { + // Short-circuit if the entire vector is true. + break; } - @Override - public boolean canVectorize() - { - for (FilterBundle.MatcherBundle bundle : allMatcherBundles) { - if (!bundle.canVectorize()) { - return false; - } - } - return true; + currentMask.removeAll(currentMatch); + currentMatch = baseMatchers[i].match(currentMask, false); + retVal.addAll(currentMatch, scratch); + + if (currentMatch == currentMask) { + // baseMatchers[i] matched every remaining row. Short-circuit out. + break; } } - ); - } - - @Nullable - @Override - public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) - { - if (filters.size() == 1) { - return Iterables.getOnlyElement(filters).getBitmapColumnIndex(selector); - } - List bitmapColumnIndices = new ArrayList<>(filters.size()); - ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true); - for (Filter filter : filters) { - BitmapColumnIndex index = filter.getBitmapColumnIndex(selector); - if (index == null) { - // all or nothing - return null; + assert retVal.isValid(mask); + return retVal; } - merged = merged.merge(index.getIndexCapabilities()); - bitmapColumnIndices.add(index); - } + }; + } - final ColumnIndexCapabilities finalMerged = merged; - return new BitmapColumnIndex() - { - @Override - public ColumnIndexCapabilities getIndexCapabilities() - { - return finalMerged; - } - - @Override - public int estimatedComputeCost() - { - // There's no additional cost on OR filter, cost in child filters would be summed. - return 0; - } - - @Override - public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) - { - return bitmapResultFactory.union( - () -> bitmapColumnIndices.stream() - .map(x -> x.computeBitmapResult(bitmapResultFactory, includeUnknown)) - .iterator() - ); - } - - @Nullable - @Override - public T computeBitmapResult( - BitmapResultFactory bitmapResultFactory, - int applyRowCount, - int totalRowCount, - boolean includeUnknown - ) - { - List results = Lists.newArrayListWithCapacity(bitmapColumnIndices.size()); - for (BitmapColumnIndex index : bitmapColumnIndices) { - final T r = index.computeBitmapResult(bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); - if (r == null) { - // all or nothing - return null; - } - results.add(r); - } - return bitmapResultFactory.union(results); - } - }; - } - - @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) - { - final ValueMatcher[] matchers = new ValueMatcher[filters.size()]; - - int i = 0; - for (Filter filter : filters) { - matchers[i++] = filter.makeMatcher(factory); - } - return makeMatcher(matchers); - } - - @Override - public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) - { - final VectorValueMatcher[] matchers = new VectorValueMatcher[filters.size()]; - - int i = 0; - for (Filter filter : filters) { - matchers[i++] = filter.makeVectorMatcher(factory); - } - return makeVectorMatcher(matchers); - } - - @Override - public boolean canVectorizeMatcher(ColumnInspector inspector) - { - return filters.stream().allMatch(filter -> filter.canVectorizeMatcher(inspector)); - } - - @Override - public LinkedHashSet getFilters() - { - return filters; - } - - @Override - public boolean supportsRequiredColumnRewrite() - { - for (Filter filter : filters) { - if (!filter.supportsRequiredColumnRewrite()) { - return false; - } - } - - return true; - } - - @Override - public Filter rewriteRequiredColumns(Map columnRewrites) - { - final List newFilters = new ArrayList<>(filters.size()); - for (Filter filter : filters) { - newFilters.add(filter.rewriteRequiredColumns(columnRewrites)); - } - return new OrFilter(newFilters); - } - - @Override - public String toString() - { - return StringUtils.format("(%s)", OR_JOINER.join(filters)); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - OrFilter orFilter = (OrFilter) o; - return Objects.equals(getFilters(), orFilter.getFilters()); - } - - @Override - public int hashCode() - { - return Objects.hash(getFilters()); - } - - - private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new ValueMatcher() - { - @Override - public boolean matches(boolean includeUnknown) - { - for (ValueMatcher matcher : baseMatchers) { - if (matcher.matches(includeUnknown)) { - return true; - } - } - return false; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("firstBaseMatcher", baseMatchers[0]); - inspector.visit("secondBaseMatcher", baseMatchers[1]); - // Don't inspect the 3rd and all consequent baseMatchers, cut runtime shape combinations at this point. - // Anyway if the filter is so complex, Hotspot won't inline all calls because of the inline limit. - } - }; - } - - private static VectorValueMatcher makeVectorMatcher(final VectorValueMatcher[] baseMatchers) - { - Preconditions.checkState(baseMatchers.length > 0); - if (baseMatchers.length == 1) { - return baseMatchers[0]; - } - - return new BaseVectorValueMatcher(baseMatchers[0]) - { - final VectorMatch currentMask = VectorMatch.wrap(new int[getMaxVectorSize()]); - final VectorMatch scratch = VectorMatch.wrap(new int[getMaxVectorSize()]); - final VectorMatch retVal = VectorMatch.wrap(new int[getMaxVectorSize()]); - - @Override - public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean includeUnknown) - { - ReadableVectorMatch currentMatch = baseMatchers[0].match(mask, includeUnknown); - - // Initialize currentMask = mask, then progressively remove rows from the mask as we find matches for them. - // This isn't necessary for correctness (we could use the original "mask" on every call to "match") but it - // allows for short-circuiting on a row-by-row basis. - currentMask.copyFrom(mask); - - // Initialize retVal = currentMatch, the rows matched by the first matcher. We'll add more as we loop over - // the rest of the matchers. - retVal.copyFrom(currentMatch); - - for (int i = 1; i < baseMatchers.length; i++) { - if (retVal.isAllTrue(getCurrentVectorSize())) { - // Short-circuit if the entire vector is true. - break; - } - - currentMask.removeAll(currentMatch); - currentMatch = baseMatchers[i].match(currentMask, false); - retVal.addAll(currentMatch, scratch); - - if (currentMatch == currentMask) { - // baseMatchers[i] matched every remaining row. Short-circuit out. - break; - } - } - - assert retVal.isValid(mask); - return retVal; - } - }; - } - - /** - * Convert a {@link FilterBundle} that has both {@link FilterBundle#getIndex()} and - * {@link FilterBundle#getMatcherBundle()} into a 'matcher only' bundle by converting the index into a matcher - * with {@link #convertIndexToValueMatcher(ReadableOffset, ImmutableBitmap, boolean)} and - * {@link #convertIndexToVectorValueMatcher(ReadableVectorOffset, ImmutableBitmap)} and then doing a logical AND - * with the bundles matchers. - */ - private static FilterBundle.MatcherBundle convertBundleToMatcherOnlyBundle( - FilterBundle bundle, - ImmutableBitmap bundleIndex - ) - { - return new FilterBundle.MatcherBundle() + /** + * Convert a {@link FilterBundle} that has both {@link FilterBundle#getIndex()} and + * {@link FilterBundle#getMatcherBundle()} into a 'matcher only' bundle by converting the index into a matcher + * with {@link #convertIndexToValueMatcher(ReadableOffset, ImmutableBitmap, boolean)} and + * {@link #convertIndexToVectorValueMatcher(ReadableVectorOffset, ImmutableBitmap)} and then doing a logical AND + * with the bundles matchers. + */ + private static FilterBundle.MatcherBundle convertBundleToMatcherOnlyBundle( + FilterBundle bundle, + ImmutableBitmap bundleIndex + ) + { + return new FilterBundle.MatcherBundle() { @Override public FilterBundle.MatcherBundleInfo getMatcherInfo() @@ -571,186 +239,517 @@ private static FilterBundle.MatcherBundle convertIndexToMatcherBundle( return new FilterBundle.MatcherBundle() { @Override - public FilterBundle.MatcherBundleInfo getMatcherInfo() + public FilterBundle.MatcherBundleInfo getMatcherInfo() + { + if (indexOnlyBundles.size() == 1) { + return new FilterBundle.MatcherBundleInfo( + indexOnlyBundles.get(0).getIndexInfo()::getFilter, + indexOnlyBundles.get(0).getIndexInfo(), + null + ); + } + return new FilterBundle.MatcherBundleInfo( + () -> "OR", + new FilterBundle.IndexBundleInfo( + () -> "OR", + selectionRowCount, + totalBitmapConstructTimeNs, + indexOnlyBundlesInfo + ), + null + ); + } + + @Override + public ValueMatcher valueMatcher( + ColumnSelectorFactory selectorFactory, + Offset baseOffset, + boolean descending + ) + { + return convertIndexToValueMatcher(baseOffset.getBaseReadableOffset(), partialIndex, descending); + } + + @Override + public VectorValueMatcher vectorMatcher( + VectorColumnSelectorFactory selectorFactory, + ReadableVectorOffset baseOffset + ) + { + return convertIndexToVectorValueMatcher(baseOffset, partialIndex); + } + + @Override + public boolean canVectorize() + { + return true; + } + }; + } + + private static ValueMatcher convertIndexToValueMatcher( + final ReadableOffset offset, + final ImmutableBitmap rowBitmap, + boolean descending + ) + { + + if (descending) { + + final IntIterator iter = BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap); + + if (!iter.hasNext()) { + return ValueMatchers.allFalse(); + } + return new ValueMatcher() + { + int iterOffset = Integer.MAX_VALUE; + + @Override + public boolean matches(boolean includeUnknown) + { + int currentOffset = offset.getOffset(); + while (iterOffset > currentOffset && iter.hasNext()) { + iterOffset = iter.next(); + } + + return iterOffset == currentOffset; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("offset", offset); + inspector.visit("iter", iter); + } + }; + } else { + final PeekableIntIterator peekableIterator = rowBitmap.peekableIterator(); + + if (!peekableIterator.hasNext()) { + return ValueMatchers.allFalse(); + } + return new ValueMatcher() + { + int iterOffset = -1; + + @Override + public boolean matches(boolean includeUnknown) + { + int currentOffset = offset.getOffset(); + peekableIterator.advanceIfNeeded(currentOffset); + if (peekableIterator.hasNext()) { + iterOffset = peekableIterator.peekNext(); + } + + return iterOffset == currentOffset; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("offset", offset); + inspector.visit("peekableIterator", peekableIterator); + } + }; + } + } + + private static VectorValueMatcher convertIndexToVectorValueMatcher( + final ReadableVectorOffset vectorOffset, + final ImmutableBitmap bitmap + ) + { + final PeekableIntIterator peekableIntIterator = bitmap.peekableIterator(); + if (!peekableIntIterator.hasNext()) { + return BooleanVectorValueMatcher.of(vectorOffset, ConstantMatcherType.ALL_FALSE); + } + + return new VectorValueMatcher() + { + final VectorMatch match = VectorMatch.wrap(new int[vectorOffset.getMaxVectorSize()]); + int iterOffset = -1; + + @Override + public ReadableVectorMatch match(ReadableVectorMatch mask, boolean includeUnknown) + { + final int[] selection = match.getSelection(); + if (vectorOffset.isContiguous()) { + int numRows = 0; + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int maskNum = mask.getSelection()[i]; + final int rowNum = vectorOffset.getStartOffset() + maskNum; + peekableIntIterator.advanceIfNeeded(rowNum); + if (peekableIntIterator.hasNext()) { + iterOffset = peekableIntIterator.peekNext(); + if (iterOffset == rowNum) { + selection[numRows++] = maskNum; + } + } + } + match.setSelectionSize(numRows); + return match; + } else { + final int[] currentOffsets = vectorOffset.getOffsets(); + int numRows = 0; + for (int i = 0; i < mask.getSelectionSize(); i++) { + final int maskNum = mask.getSelection()[i]; + final int rowNum = currentOffsets[mask.getSelection()[i]]; + peekableIntIterator.advanceIfNeeded(rowNum); + if (peekableIntIterator.hasNext()) { + iterOffset = peekableIntIterator.peekNext(); + if (iterOffset == rowNum) { + selection[numRows++] = maskNum; + } + } + } + match.setSelectionSize(numRows); + return match; + } + } + + @Override + public int getMaxVectorSize() + { + return vectorOffset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return vectorOffset.getCurrentVectorSize(); + } + }; + } + + @Override + public FilterBundle makeFilterBundle( + FilterBundle.Builder filterBundleBuilder, + BitmapResultFactory bitmapResultFactory, + int applyRowCount, + int totalRowCount, + boolean includeUnknown + ) + { + // for OR filters, we have a few possible outcomes: + // 1 - all clauses are index only bundles. in this case we union the bitmaps together and make an index only bundle + // 2 - some clauses support indexes. in this case, we union the bitmaps of any index only bundles together to form a + // partial index which is constructed into a matcher bundle with convertIndexToMatcherBundle. We translate any + // index AND matcher bundles into a matcher only bundle with convertBundleToMatcherOnlyBundle. Finally, we + // combine these with the remaining matcher only bundles to with makeMatcher/makeVectorMatcher to make a matcher + // only bundle + // 3 - no clauses support indexes. in this case, we make a matcher only bundle using makeMatcher/makeVectorMatcher + + final List indexOnlyBundles = new ArrayList<>(); + final List indexOnlyBundlesInfo = new ArrayList<>(); + final List partialIndexBundles = new ArrayList<>(); + final List matcherOnlyBundles = new ArrayList<>(); + + int indexUnionSize = 0; + ImmutableBitmap index = null; + ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true); + int emptyCount = 0; + + final long bitmapConstructionStartNs = System.nanoTime(); + for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { + final FilterBundle bundle = subFilterBundleBuilder.getFilter().makeFilterBundle( + subFilterBundleBuilder, + bitmapResultFactory, + Math.min(applyRowCount, totalRowCount - indexUnionSize), + totalRowCount, + includeUnknown + ); + if (bundle.hasIndex()) { + final ImmutableBitmap bundleIndex = bundle.getIndex().getBitmap(); + if (bundleIndex.isEmpty()) { + // we leave any indexes which are empty out of index, indexOnlyBundles, and partialIndexBundles + // even though we skip them, we still keep track of them to check for the case when we can build the OR into + // an index only bundle. We can count index and matcher bundles here too because the AND operation means that + // an empty index means the matcher can be skipped + emptyCount++; + } else { + if (bundle.hasMatcher()) { + // index and matcher bundles must be handled separately, they will need to be a single value matcher built + // by doing an AND operation between the index and the value matcher + // (a bundle is basically an AND operation between the index and matcher if the matcher is present) + partialIndexBundles.add(convertBundleToMatcherOnlyBundle(bundle, bundleIndex)); + } else { + indexOnlyBundles.add(bundle.getIndex()); + indexOnlyBundlesInfo.add(bundle.getIndex().getIndexInfo()); + merged.merge(bundle.getIndex().getIndexCapabilities()); + // union index only bitmaps together; if all sub-filters are 'index only' bundles we will make an index only + // bundle ourselves, else we will use this index as a single value matcher + if (index == null) { + index = bundle.getIndex().getBitmap(); + } else { + index = index.union(bundle.getIndex().getBitmap()); + } + indexUnionSize = index.size(); + } + } + } else { + matcherOnlyBundles.add(bundle.getMatcherBundle()); + } + } + final long totalBitmapConstructTimeNs = System.nanoTime() - bitmapConstructionStartNs; + + + // if all the filters are 'index only', we can make an index only bundle + if (indexOnlyBundles.size() + emptyCount == filters.size()) { + if (index == null || index.isEmpty()) { + return FilterBundle.allFalse( + totalBitmapConstructTimeNs, + filterBundleBuilder.getColumnIndexSelector().getBitmapFactory().makeEmptyImmutableBitmap() + ); + } + if (indexOnlyBundles.size() == 1) { + return new FilterBundle( + indexOnlyBundles.get(0), + null + ); + } + return new FilterBundle( + new FilterBundle.SimpleIndexBundle( + new FilterBundle.IndexBundleInfo( + () -> "OR", + applyRowCount, + totalBitmapConstructTimeNs, + indexOnlyBundlesInfo + ), + index, + merged + ), + null + ); + } + + // if not the index only outcome, we build a matcher only bundle from all the matchers + final int estimatedSize = (indexOnlyBundles.isEmpty() ? 0 : 1) + + partialIndexBundles.size() + + matcherOnlyBundles.size(); + final List allMatcherBundles = Lists.newArrayListWithCapacity(estimatedSize); + final List allMatcherBundlesInfo = Lists.newArrayListWithCapacity(estimatedSize); + if (!indexOnlyBundles.isEmpty()) { + // translate the indexOnly bundles into a single matcher + final FilterBundle.MatcherBundle matcherBundle = convertIndexToMatcherBundle( + applyRowCount, + indexOnlyBundles, + indexOnlyBundlesInfo, + totalBitmapConstructTimeNs, + index + ); + allMatcherBundles.add(matcherBundle); + allMatcherBundlesInfo.add(matcherBundle.getMatcherInfo()); + } + for (FilterBundle.MatcherBundle bundle : partialIndexBundles) { + allMatcherBundles.add(bundle); + allMatcherBundlesInfo.add(bundle.getMatcherInfo()); + } + for (FilterBundle.MatcherBundle bundle : matcherOnlyBundles) { + allMatcherBundles.add(bundle); + allMatcherBundlesInfo.add(bundle.getMatcherInfo()); + } + + return new FilterBundle( + null, + new FilterBundle.MatcherBundle() + { + @Override + public FilterBundle.MatcherBundleInfo getMatcherInfo() + { + return new FilterBundle.MatcherBundleInfo( + () -> "OR", + null, + allMatcherBundlesInfo + ); + } + + @Override + public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending) + { + final ValueMatcher[] matchers = new ValueMatcher[allMatcherBundles.size()]; + for (int i = 0; i < allMatcherBundles.size(); i++) { + matchers[i] = allMatcherBundles.get(i).valueMatcher(selectorFactory, baseOffset, descending); + } + return makeMatcher(matchers); + } + + @Override + public VectorValueMatcher vectorMatcher( + VectorColumnSelectorFactory selectorFactory, + ReadableVectorOffset baseOffset + ) + { + final VectorValueMatcher[] matchers = new VectorValueMatcher[allMatcherBundles.size()]; + for (int i = 0; i < allMatcherBundles.size(); i++) { + matchers[i] = allMatcherBundles.get(i).vectorMatcher(selectorFactory, baseOffset); + } + return makeVectorMatcher(matchers); + } + + @Override + public boolean canVectorize() + { + for (FilterBundle.MatcherBundle bundle : allMatcherBundles) { + if (!bundle.canVectorize()) { + return false; + } + } + return true; + } + } + ); + } + + @Nullable + @Override + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + if (filters.size() == 1) { + return Iterables.getOnlyElement(filters).getBitmapColumnIndex(selector); + } + + List bitmapColumnIndices = new ArrayList<>(filters.size()); + ColumnIndexCapabilities merged = new SimpleColumnIndexCapabilities(true, true); + for (Filter filter : filters) { + BitmapColumnIndex index = filter.getBitmapColumnIndex(selector); + if (index == null) { + // all or nothing + return null; + } + merged = merged.merge(index.getIndexCapabilities()); + bitmapColumnIndices.add(index); + } + + final ColumnIndexCapabilities finalMerged = merged; + return new BitmapColumnIndex() + { + @Override + public ColumnIndexCapabilities getIndexCapabilities() { - if (indexOnlyBundles.size() == 1) { - return new FilterBundle.MatcherBundleInfo( - indexOnlyBundles.get(0).getIndexInfo()::getFilter, - indexOnlyBundles.get(0).getIndexInfo(), - null - ); - } - return new FilterBundle.MatcherBundleInfo( - () -> "OR", - new FilterBundle.IndexBundleInfo( - () -> "OR", - selectionRowCount, - totalBitmapConstructTimeNs, - indexOnlyBundlesInfo - ), - null - ); + return finalMerged; } @Override - public ValueMatcher valueMatcher( - ColumnSelectorFactory selectorFactory, - Offset baseOffset, - boolean descending - ) + public int estimatedComputeCost() { - return convertIndexToValueMatcher(baseOffset.getBaseReadableOffset(), partialIndex, descending); + // There's no additional cost on OR filter, cost in child filters would be summed. + return 0; } @Override - public VectorValueMatcher vectorMatcher( - VectorColumnSelectorFactory selectorFactory, - ReadableVectorOffset baseOffset - ) + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) { - return convertIndexToVectorValueMatcher(baseOffset, partialIndex); + return bitmapResultFactory.union( + () -> bitmapColumnIndices.stream() + .map(x -> x.computeBitmapResult(bitmapResultFactory, includeUnknown)) + .iterator() + ); } + @Nullable @Override - public boolean canVectorize() + public T computeBitmapResult( + BitmapResultFactory bitmapResultFactory, + int applyRowCount, + int totalRowCount, + boolean includeUnknown + ) { - return true; + List results = Lists.newArrayListWithCapacity(bitmapColumnIndices.size()); + for (BitmapColumnIndex index : bitmapColumnIndices) { + final T r = index.computeBitmapResult(bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); + if (r == null) { + // all or nothing + return null; + } + results.add(r); + } + return bitmapResultFactory.union(results); } }; } - private static ValueMatcher convertIndexToValueMatcher( - final ReadableOffset offset, - final ImmutableBitmap rowBitmap, - boolean descending - ) + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) { + final ValueMatcher[] matchers = new ValueMatcher[filters.size()]; - if (descending) { - - final IntIterator iter = BitmapOffset.getReverseBitmapOffsetIterator(rowBitmap); + int i = 0; + for (Filter filter : filters) { + matchers[i++] = filter.makeMatcher(factory); + } + return makeMatcher(matchers); + } - if (!iter.hasNext()) { - return ValueMatchers.allFalse(); - } - return new ValueMatcher() - { - int iterOffset = Integer.MAX_VALUE; + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher[] matchers = new VectorValueMatcher[filters.size()]; - @Override - public boolean matches(boolean includeUnknown) - { - int currentOffset = offset.getOffset(); - while (iterOffset > currentOffset && iter.hasNext()) { - iterOffset = iter.next(); - } + int i = 0; + for (Filter filter : filters) { + matchers[i++] = filter.makeVectorMatcher(factory); + } + return makeVectorMatcher(matchers); + } - return iterOffset == currentOffset; - } + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return filters.stream().allMatch(filter -> filter.canVectorizeMatcher(inspector)); + } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("offset", offset); - inspector.visit("iter", iter); - } - }; - } else { - final PeekableIntIterator peekableIterator = rowBitmap.peekableIterator(); + @Override + public LinkedHashSet getFilters() + { + return filters; + } - if (!peekableIterator.hasNext()) { - return ValueMatchers.allFalse(); + @Override + public boolean supportsRequiredColumnRewrite() + { + for (Filter filter : filters) { + if (!filter.supportsRequiredColumnRewrite()) { + return false; } - return new ValueMatcher() - { - int iterOffset = -1; - - @Override - public boolean matches(boolean includeUnknown) - { - int currentOffset = offset.getOffset(); - peekableIterator.advanceIfNeeded(currentOffset); - if (peekableIterator.hasNext()) { - iterOffset = peekableIterator.peekNext(); - } - - return iterOffset == currentOffset; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("offset", offset); - inspector.visit("peekableIterator", peekableIterator); - } - }; } + + return true; } - private static VectorValueMatcher convertIndexToVectorValueMatcher( - final ReadableVectorOffset vectorOffset, - final ImmutableBitmap bitmap - ) + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) { - final PeekableIntIterator peekableIntIterator = bitmap.peekableIterator(); - if (!peekableIntIterator.hasNext()) { - return BooleanVectorValueMatcher.of(vectorOffset, ConstantMatcherType.ALL_FALSE); + final List newFilters = new ArrayList<>(filters.size()); + for (Filter filter : filters) { + newFilters.add(filter.rewriteRequiredColumns(columnRewrites)); } + return new OrFilter(newFilters); + } - return new VectorValueMatcher() - { - final VectorMatch match = VectorMatch.wrap(new int[vectorOffset.getMaxVectorSize()]); - int iterOffset = -1; - - @Override - public ReadableVectorMatch match(ReadableVectorMatch mask, boolean includeUnknown) - { - final int[] selection = match.getSelection(); - if (vectorOffset.isContiguous()) { - int numRows = 0; - for (int i = 0; i < mask.getSelectionSize(); i++) { - final int maskNum = mask.getSelection()[i]; - final int rowNum = vectorOffset.getStartOffset() + maskNum; - peekableIntIterator.advanceIfNeeded(rowNum); - if (peekableIntIterator.hasNext()) { - iterOffset = peekableIntIterator.peekNext(); - if (iterOffset == rowNum) { - selection[numRows++] = maskNum; - } - } - } - match.setSelectionSize(numRows); - return match; - } else { - final int[] currentOffsets = vectorOffset.getOffsets(); - int numRows = 0; - for (int i = 0; i < mask.getSelectionSize(); i++) { - final int maskNum = mask.getSelection()[i]; - final int rowNum = currentOffsets[mask.getSelection()[i]]; - peekableIntIterator.advanceIfNeeded(rowNum); - if (peekableIntIterator.hasNext()) { - iterOffset = peekableIntIterator.peekNext(); - if (iterOffset == rowNum) { - selection[numRows++] = maskNum; - } - } - } - match.setSelectionSize(numRows); - return match; - } - } + @Override + public String toString() + { + return StringUtils.format("(%s)", OR_JOINER.join(filters)); + } - @Override - public int getMaxVectorSize() - { - return vectorOffset.getMaxVectorSize(); - } + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + OrFilter orFilter = (OrFilter) o; + return Objects.equals(getFilters(), orFilter.getFilters()); + } - @Override - public int getCurrentVectorSize() - { - return vectorOffset.getCurrentVectorSize(); - } - }; + @Override + public int hashCode() + { + return Objects.hash(getFilters()); } } From 4cbe2201d5ece124f38b26889e0395839d890401 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 12 Sep 2024 13:02:55 -0700 Subject: [PATCH 05/10] Fix javadoc in FilterTunning --- .../main/java/org/apache/druid/query/filter/FilterTuning.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterTuning.java b/processing/src/main/java/org/apache/druid/query/filter/FilterTuning.java index 831d50261e21..892192128e0f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterTuning.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterTuning.java @@ -30,7 +30,7 @@ /** * This class provides a mechanism to influence whether or not indexes are used for a {@link Filter} during processing - * by {@link Filter#makeFilterBundle(ColumnIndexSelector, BitmapResultFactory, int, int, boolean)} + * by {@link Filter#makeFilterBundle(FilterBundle.Builder, BitmapResultFactory, int, int, boolean)} * (i.e. will a {@link Filter} be a "pre" filter in which we union indexes for all values that match the filter to * create a {@link org.apache.druid.segment.BitmapOffset}/{@link org.apache.druid.segment.vector.BitmapVectorOffset}, * or will it be used as a "post" filter and evaluated while scanning row values from the From 98bcb3c0de94582071be1c7de3177e26dcac84c1 Mon Sep 17 00:00:00 2001 From: cecemei Date: Fri, 13 Sep 2024 14:44:45 -0700 Subject: [PATCH 06/10] Made some changes in response to comments. --- .../org/apache/druid/query/QueryContexts.java | 1 + .../org/apache/druid/query/filter/Filter.java | 15 +++++--- .../druid/query/filter/FilterBundle.java | 36 ++++++++++--------- .../segment/QueryableIndexCursorHolder.java | 11 +++++- .../druid/segment/filter/AndFilter.java | 19 +++++----- .../apache/druid/segment/filter/OrFilter.java | 17 ++++----- .../segment/filter/cnf/CalciteCnfHelper.java | 4 +-- .../segment/filter/FilterBundleTest.java | 16 ++++++++- 8 files changed, 73 insertions(+), 46 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index afdc5a552f0c..ced9f0d4e2d9 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -63,6 +63,7 @@ public class QueryContexts public static final String REWRITE_JOIN_TO_FILTER_ENABLE_KEY = "enableRewriteJoinToFilter"; public static final String JOIN_FILTER_REWRITE_MAX_SIZE_KEY = "joinFilterRewriteMaxSize"; public static final String MAX_NUMERIC_IN_FILTERS = "maxNumericInFilters"; + public static final String CURSOR_AUTO_ARRANGE_FILTERS = "cursorAutoArrangeFilters"; // This flag controls whether a SQL join query with left scan should be attempted to be run as direct table access // instead of being wrapped inside a query. With direct table access enabled, Druid can push down the join operation to // data servers. diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index 3adfc4e80f8a..d3caf4b983a4 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -19,6 +19,7 @@ package org.apache.druid.query.filter; +import com.google.common.collect.ImmutableSet; import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; @@ -31,7 +32,6 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; @@ -44,12 +44,17 @@ default String getFilterString() } /** - * Returns a LinkedHashSet of all child filters for this filter with no duplicates. - *

The ordering of child filters is important in some cases, e.x.short-curcuiting.

+ * Returns a sorted set of all child filters for this filter with no duplicates. The default return value is an empty + * set. + *

+ * Only {@link org.apache.druid.segment.filter.AndFilter} and {@link org.apache.druid.segment.filter.OrFilter} have + * child filters. Other filters would return empty (including {@link org.apache.druid.segment.filter.NotFilter}). + *

+ * The ordering of child filters is important in some cases, e.x.short-curcuiting. */ - default LinkedHashSet getFilters() + default ImmutableSet getFilters() { - return new LinkedHashSet<>(); + return ImmutableSet.of(); } /** diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index 881189699841..d4f41213cfba 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -162,9 +162,9 @@ public static class Builder @Nullable private final BitmapColumnIndex bitmapColumnIndex; private final List childBuilders; - private final int estimatedComputeCost; + private final int estimatedIndexComputeCost; - public Builder(Filter filter, ColumnIndexSelector columnIndexSelector) + public Builder(Filter filter, ColumnIndexSelector columnIndexSelector, boolean cursorAutoArrangeFilters) { this.filter = filter; this.columnIndexSelector = columnIndexSelector; @@ -172,14 +172,18 @@ public Builder(Filter filter, ColumnIndexSelector columnIndexSelector) // Construct Builder instances for all child filters recursively. this.childBuilders = new ArrayList<>(filter.getFilters().size()); for (Filter childFilter : filter.getFilters()) { - childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector)); + childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector, cursorAutoArrangeFilters)); + } + if (cursorAutoArrangeFilters) { + // Sort child builders by cost in ASCENDING order, should be stable by default. + this.childBuilders.sort(Comparator.comparingInt(FilterBundle.Builder::getEstimatedIndexComputeCost)); + this.estimatedIndexComputeCost = calculateEstimatedIndexComputeCost(); + } else { + this.estimatedIndexComputeCost = Integer.MAX_VALUE; } - // Sort child builders by cost in ASCENDING order, should be stable by default. - this.childBuilders.sort(Comparator.comparingInt(FilterBundle.Builder::getEstimatedComputeCost)); - this.estimatedComputeCost = calculateEstimatedComputeCost(); } - private int calculateEstimatedComputeCost() + private int calculateEstimatedIndexComputeCost() { if (this.bitmapColumnIndex == null) { return Integer.MAX_VALUE; @@ -189,8 +193,8 @@ private int calculateEstimatedComputeCost() return Integer.MAX_VALUE; } - for (FilterBundle.Builder childBuilder : this.childBuilders) { - int childCost = childBuilder.getEstimatedComputeCost(); + for (FilterBundle.Builder childBuilder : childBuilders) { + int childCost = childBuilder.getEstimatedIndexComputeCost(); if (childCost >= Integer.MAX_VALUE - cost) { return Integer.MAX_VALUE; } @@ -201,28 +205,28 @@ private int calculateEstimatedComputeCost() public Filter getFilter() { - return this.filter; + return filter; } public ColumnIndexSelector getColumnIndexSelector() { - return this.columnIndexSelector; + return columnIndexSelector; } @Nullable public BitmapColumnIndex getBitmapColumnIndex() { - return this.bitmapColumnIndex; + return bitmapColumnIndex; } public List getChildBuilders() { - return this.childBuilders; + return childBuilders; } - public int getEstimatedComputeCost() + public int getEstimatedIndexComputeCost() { - return this.estimatedComputeCost; + return estimatedIndexComputeCost; } public FilterBundle build( @@ -232,7 +236,7 @@ public FilterBundle build( boolean includeUnknown ) { - return this.filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); + return filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown); } } diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java index 99993b93eafe..5aa4dbed8cc0 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorHolder.java @@ -34,6 +34,7 @@ import org.apache.druid.query.OrderBy; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContext; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; @@ -112,6 +113,7 @@ public QueryableIndexCursorHolder( Cursors.getTimeOrdering(ordering), interval, filter, + cursorBuildSpec.getQueryContext().getBoolean(QueryContexts.CURSOR_AUTO_ARRANGE_FILTERS, false), metrics ) ); @@ -664,6 +666,7 @@ private CursorResources( Order timeOrder, Interval interval, @Nullable Filter filter, + boolean cursorAutoArrangeFilters, @Nullable QueryMetrics> metrics ) { @@ -687,6 +690,7 @@ private CursorResources( interval, filter ), + cursorAutoArrangeFilters, bitmapIndexSelector, numRows, metrics @@ -714,6 +718,7 @@ public void close() throws IOException @Nullable private static FilterBundle makeFilterBundle( @Nullable final Filter filter, + boolean cursorAutoArrangeFilters, final ColumnSelectorColumnIndexSelector bitmapIndexSelector, final int numRows, @Nullable final QueryMetrics metrics @@ -731,7 +736,11 @@ private static FilterBundle makeFilterBundle( return null; } final long bitmapConstructionStartNs = System.nanoTime(); - final FilterBundle filterBundle = new FilterBundle.Builder(filter, bitmapIndexSelector).build( + final FilterBundle filterBundle = new FilterBundle.Builder( + filter, + bitmapIndexSelector, + cursorAutoArrangeFilters + ).build( bitmapResultFactory, numRows, numRows, diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 246a7d6fccdb..aff92d2786b2 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.StringUtils; @@ -46,7 +47,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.LinkedHashSet; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -58,18 +59,14 @@ public class AndFilter implements BooleanFilter { public static final Joiner AND_JOINER = Joiner.on(" && "); - private final LinkedHashSet filters; - - public AndFilter(LinkedHashSet filters) - { - Preconditions.checkArgument(filters.size() > 0, "Can't construct empty AndFilter"); - this.filters = filters; - } + private final ImmutableSet filters; @VisibleForTesting - public AndFilter(List filters) + public AndFilter(Collection filters) { - this(new LinkedHashSet<>(filters)); + Preconditions.checkArgument(filters.size() > 0, "Can't construct empty AndFilter"); + // The order of elements in filters param would be preserved. + this.filters = ImmutableSet.copyOf(filters); } public static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) @@ -383,7 +380,7 @@ public boolean canVectorizeMatcher(ColumnInspector inspector) } @Override - public LinkedHashSet getFilters() + public ImmutableSet getFilters() { return filters; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 7fb867872df2..6ef3353dab97 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -21,6 +21,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -52,8 +53,8 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -65,17 +66,13 @@ public class OrFilter implements BooleanFilter { private static final Joiner OR_JOINER = Joiner.on(" || "); - private final LinkedHashSet filters; + private final ImmutableSet filters; - public OrFilter(LinkedHashSet filters) + public OrFilter(Collection filters) { Preconditions.checkArgument(filters.size() > 0, "Can't construct empty OrFilter (the universe does not exist)"); - this.filters = filters; - } - - public OrFilter(List filters) - { - this(new LinkedHashSet<>(filters)); + // The order of elements in filters param would be preserved. + this.filters = ImmutableSet.copyOf(filters); } private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) @@ -701,7 +698,7 @@ public boolean canVectorizeMatcher(ColumnInspector inspector) } @Override - public LinkedHashSet getFilters() + public ImmutableSet getFilters() { return filters; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java index 18c7e4307ec6..c5cd2b2009d4 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.filter.cnf; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.filter.AndFilter; @@ -31,7 +32,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -44,7 +44,7 @@ public class CalciteCnfHelper { public static Filter pull(Filter rex) { - final LinkedHashSet operands; + final ImmutableSet operands; if (rex instanceof AndFilter) { operands = ((AndFilter) rex).getFilters(); return and(pullList(operands)); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java index d1a65f7ad5f4..9bc86ae3900e 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterBundleTest.java @@ -43,7 +43,12 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; +@RunWith(Parameterized.class) public class FilterBundleTest extends InitializedNullHandlingTest { private Closer closer; @@ -53,6 +58,15 @@ public class FilterBundleTest extends InitializedNullHandlingTest @Rule public TemporaryFolder tmpDir = new TemporaryFolder(); + @Parameters + public static Object[] flags() + { + return new Object[]{false, true}; + } + + @Parameter + public boolean cursorAutoArrangeFilters; + @Before public void setUp() { @@ -317,7 +331,7 @@ public void test_or_countryIsNull_and_isRobotInFalseTrue_pageLike() protected FilterBundle makeFilterBundle(final Filter filter) { - return new FilterBundle.Builder(filter, indexSelector).build( + return new FilterBundle.Builder(filter, indexSelector, cursorAutoArrangeFilters).build( new DefaultBitmapResultFactory(bitmapFactory), indexSelector.getNumRows(), indexSelector.getNumRows(), From 60a32f5e96328132981e9f5acc38cfcde20f0d13 Mon Sep 17 00:00:00 2001 From: cecemei Date: Fri, 13 Sep 2024 15:24:12 -0700 Subject: [PATCH 07/10] small edit on comment. --- .../src/main/java/org/apache/druid/query/filter/Filter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index d3caf4b983a4..fb280d693da8 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -44,8 +44,8 @@ default String getFilterString() } /** - * Returns a sorted set of all child filters for this filter with no duplicates. The default return value is an empty - * set. + * Returns an ordered set of all child filters for this filter with no duplicates. The default return value is an + * empty set. *

* Only {@link org.apache.druid.segment.filter.AndFilter} and {@link org.apache.druid.segment.filter.OrFilter} have * child filters. Other filters would return empty (including {@link org.apache.druid.segment.filter.NotFilter}). From 35e33e228e87665b5a83c93f2c62992dcbd0dde3 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 17 Sep 2024 12:37:12 -0700 Subject: [PATCH 08/10] Move getFilters method back to BooleanFilter and removed getFilterString method since it's not used --- .../druid/query/filter/BooleanFilter.java | 9 +++++++ .../org/apache/druid/query/filter/Filter.java | 20 --------------- .../druid/segment/filter/AndFilter.java | 25 ++++++++----------- .../apache/druid/segment/filter/OrFilter.java | 17 +++++++------ 4 files changed, 30 insertions(+), 41 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java index 4c3cedefe6db..e14e8b8d5e30 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BooleanFilter.java @@ -20,10 +20,19 @@ package org.apache.druid.query.filter; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Set; public interface BooleanFilter extends Filter { + /** + * Returns the child filters for this filter. + * + * This is a LinkedHashSet because we don't want duplicates, but the order is also important in some cases (such + * as when filters are provided in an order that encourages short-circuiting.) + */ + LinkedHashSet getFilters(); + @Override default Set getRequiredColumns() { diff --git a/processing/src/main/java/org/apache/druid/query/filter/Filter.java b/processing/src/main/java/org/apache/druid/query/filter/Filter.java index 5f8858fcef4c..147406b2261b 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/Filter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/Filter.java @@ -19,7 +19,6 @@ package org.apache.druid.query.filter; -import com.google.common.collect.ImmutableSet; import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; @@ -38,25 +37,6 @@ @SubclassesMustOverrideEqualsAndHashCode public interface Filter { - default String getFilterString() - { - return toString(); - } - - /** - * Returns an ordered set of all child filters for this filter with no duplicates. The default return value is an - * empty set. - *

- * Only {@link org.apache.druid.segment.filter.AndFilter} and {@link org.apache.druid.segment.filter.OrFilter} have - * child filters. Other filters would return empty (including {@link org.apache.druid.segment.filter.NotFilter}). - *

- * The ordering of child filters is important in some cases, e.x.short-curcuiting. - */ - default ImmutableSet getFilters() - { - return ImmutableSet.of(); - } - /** * Compute indexes and build a container {@link FilterBundle} to be used during * {@link org.apache.druid.segment.Cursor} or {@link org.apache.druid.segment.vector.VectorCursor} creation, combining diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index aff92d2786b2..29f26f8effb2 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -22,7 +22,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.StringUtils; @@ -47,7 +46,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Collection; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -59,14 +58,18 @@ public class AndFilter implements BooleanFilter { public static final Joiner AND_JOINER = Joiner.on(" && "); - private final ImmutableSet filters; + private final LinkedHashSet filters; - @VisibleForTesting - public AndFilter(Collection filters) + public AndFilter(LinkedHashSet filters) { Preconditions.checkArgument(filters.size() > 0, "Can't construct empty AndFilter"); - // The order of elements in filters param would be preserved. - this.filters = ImmutableSet.copyOf(filters); + this.filters = filters; + } + + @VisibleForTesting + public AndFilter(List filters) + { + this(new LinkedHashSet<>(filters)); } public static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) @@ -128,12 +131,6 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean include }; } - @Override - public String getFilterString() - { - return "AND"; - } - @Override public FilterBundle makeFilterBundle( FilterBundle.Builder filterBundleBuilder, @@ -380,7 +377,7 @@ public boolean canVectorizeMatcher(ColumnInspector inspector) } @Override - public ImmutableSet getFilters() + public LinkedHashSet getFilters() { return filters; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 6ef3353dab97..7fb867872df2 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -21,7 +21,6 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -53,8 +52,8 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -66,13 +65,17 @@ public class OrFilter implements BooleanFilter { private static final Joiner OR_JOINER = Joiner.on(" || "); - private final ImmutableSet filters; + private final LinkedHashSet filters; - public OrFilter(Collection filters) + public OrFilter(LinkedHashSet filters) { Preconditions.checkArgument(filters.size() > 0, "Can't construct empty OrFilter (the universe does not exist)"); - // The order of elements in filters param would be preserved. - this.filters = ImmutableSet.copyOf(filters); + this.filters = filters; + } + + public OrFilter(List filters) + { + this(new LinkedHashSet<>(filters)); } private static ValueMatcher makeMatcher(final ValueMatcher[] baseMatchers) @@ -698,7 +701,7 @@ public boolean canVectorizeMatcher(ColumnInspector inspector) } @Override - public ImmutableSet getFilters() + public LinkedHashSet getFilters() { return filters; } From 54e1f8da17652771a9dd0ed9e099d9ccdc382254 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 17 Sep 2024 12:42:26 -0700 Subject: [PATCH 09/10] Revert change to CalciteCnfHelper --- .../org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java index c5cd2b2009d4..18c7e4307ec6 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/cnf/CalciteCnfHelper.java @@ -20,7 +20,6 @@ package org.apache.druid.segment.filter.cnf; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.filter.AndFilter; @@ -32,6 +31,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -44,7 +44,7 @@ public class CalciteCnfHelper { public static Filter pull(Filter rex) { - final ImmutableSet operands; + final LinkedHashSet operands; if (rex instanceof AndFilter) { operands = ((AndFilter) rex).getFilters(); return and(pullList(operands)); From 07bb9f33150ce79d16189c0533897b2be4b81c8b Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 17 Sep 2024 13:02:40 -0700 Subject: [PATCH 10/10] In FilterBundle.Builder, add all child bunlders if filter is BooleanFilter. Also removed the getFilter method and updated AndFilter/OrFilter to use the build method instead. --- .../apache/druid/query/filter/FilterBundle.java | 17 +++++++++-------- .../apache/druid/segment/filter/AndFilter.java | 3 +-- .../apache/druid/segment/filter/OrFilter.java | 3 +-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java index d4f41213cfba..8511642a0c4a 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java @@ -37,6 +37,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.concurrent.TimeUnit; @@ -170,9 +171,14 @@ public Builder(Filter filter, ColumnIndexSelector columnIndexSelector, boolean c this.columnIndexSelector = columnIndexSelector; this.bitmapColumnIndex = filter.getBitmapColumnIndex(columnIndexSelector); // Construct Builder instances for all child filters recursively. - this.childBuilders = new ArrayList<>(filter.getFilters().size()); - for (Filter childFilter : filter.getFilters()) { - childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector, cursorAutoArrangeFilters)); + if (filter instanceof BooleanFilter) { + Collection childFilters = ((BooleanFilter) filter).getFilters(); + this.childBuilders = new ArrayList<>(childFilters.size()); + for (Filter childFilter : childFilters) { + this.childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector, cursorAutoArrangeFilters)); + } + } else { + this.childBuilders = new ArrayList<>(0); } if (cursorAutoArrangeFilters) { // Sort child builders by cost in ASCENDING order, should be stable by default. @@ -203,11 +209,6 @@ private int calculateEstimatedIndexComputeCost() return cost; } - public Filter getFilter() - { - return filter; - } - public ColumnIndexSelector getColumnIndexSelector() { return columnIndexSelector; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 29f26f8effb2..c4172fc6ce2d 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -157,8 +157,7 @@ public FilterBundle makeFilterBundle( // operation, this is valid (and even preferable). final long bitmapConstructionStartNs = System.nanoTime(); for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { - final FilterBundle subBundle = subFilterBundleBuilder.getFilter().makeFilterBundle( - subFilterBundleBuilder, + final FilterBundle subBundle = subFilterBundleBuilder.build( bitmapResultFactory, Math.min(applyRowCount, indexIntersectionSize), totalRowCount, diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 7fb867872df2..e8bdce85c9bf 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -452,8 +452,7 @@ public FilterBundle makeFilterBundle( final long bitmapConstructionStartNs = System.nanoTime(); for (FilterBundle.Builder subFilterBundleBuilder : filterBundleBuilder.getChildBuilders()) { - final FilterBundle bundle = subFilterBundleBuilder.getFilter().makeFilterBundle( - subFilterBundleBuilder, + final FilterBundle bundle = subFilterBundleBuilder.build( bitmapResultFactory, Math.min(applyRowCount, totalRowCount - indexUnionSize), totalRowCount,