Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class QueryContexts
public static final String REWRITE_JOIN_TO_FILTER_ENABLE_KEY = "enableRewriteJoinToFilter";
public static final String JOIN_FILTER_REWRITE_MAX_SIZE_KEY = "joinFilterRewriteMaxSize";
public static final String MAX_NUMERIC_IN_FILTERS = "maxNumericInFilters";
public static final String CURSOR_AUTO_ARRANGE_FILTERS = "cursorAutoArrangeFilters";
// This flag controls whether a SQL join query with left scan should be attempted to be run as direct table access
// instead of being wrapped inside a query. With direct table access enabled, Druid can push down the join operation to
// data servers.
Expand Down
47 changes: 22 additions & 25 deletions processing/src/main/java/org/apache/druid/query/filter/Filter.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,26 @@ public interface Filter
* cursor. If both are set, the cursor will effectively perform a logical AND to combine them.
* See {@link FilterBundle} for additional details.
*
* @param columnIndexSelector - provides {@link org.apache.druid.segment.column.ColumnIndexSupplier} to fetch column
* indexes and {@link org.apache.druid.collections.bitmap.BitmapFactory} to manipulate
* them
* @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} operations to tie into
* {@link org.apache.druid.query.QueryMetrics} and build the output indexes
* @param applyRowCount - upper bound on number of rows this filter would be applied to, after removing rows
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
* resolved using an index, then "y" will receive the number of rows that matched
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
* index, then "y" will receive the number of rows that did *not* match the filter "x".
* @param totalRowCount - total number of rows to be scanned if no indexes are applied
* @param includeUnknown - mapping for Druid native two state logic system into SQL three-state logic system. If
* set to true, bitmaps returned by this method should include true bits for any rows
* where the matching result is 'unknown', such as from the input being null valued.
* See {@link NullHandling#useThreeValueLogic()}
* @return - {@link FilterBundle} containing any indexes and/or matchers that are needed to build
* a cursor
* @param <T> - Type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default
* @param filterBundleBuilder contains {@link BitmapColumnIndex} and {@link ColumnIndexSelector}, and some additional
* info needed.
* @param bitmapResultFactory wrapper for {@link ImmutableBitmap} operations to tie into
* {@link org.apache.druid.query.QueryMetrics} and build the output indexes
* @param applyRowCount upper bound on number of rows this filter would be applied to, after removing rows
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
* resolved using an index, then "y" will receive the number of rows that matched
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
* index, then "y" will receive the number of rows that did *not* match the filter "x".
* @param totalRowCount total number of rows to be scanned if no indexes are applied
* @param includeUnknown mapping for Druid native two state logic system into SQL three-state logic system. If
* set to true, bitmaps returned by this method should include true bits for any rows
* where the matching result is 'unknown', such as from the input being null valued.
* See {@link NullHandling#useThreeValueLogic()}
* @param <T> type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default
* @return {@link FilterBundle} containing any indexes and/or matchers that are needed to build
* a cursor
*/
default <T> FilterBundle makeFilterBundle(
ColumnIndexSelector columnIndexSelector,
FilterBundle.Builder filterBundleBuilder,
BitmapResultFactory<T> bitmapResultFactory,
int applyRowCount,
int totalRowCount,
Expand All @@ -76,7 +75,7 @@ default <T> FilterBundle makeFilterBundle(
{
final FilterBundle.IndexBundle indexBundle;
final boolean needMatcher;
final BitmapColumnIndex columnIndex = getBitmapColumnIndex(columnIndexSelector);
final BitmapColumnIndex columnIndex = filterBundleBuilder.getBitmapColumnIndex();
if (columnIndex != null) {
final long bitmapConstructionStartNs = System.nanoTime();
final T result = columnIndex.computeBitmapResult(
Expand Down Expand Up @@ -107,7 +106,7 @@ default <T> FilterBundle makeFilterBundle(
new FilterBundle.MatcherBundleInfo(this::toString, null, null),
this::makeMatcher,
this::makeVectorMatcher,
this.canVectorizeMatcher(columnIndexSelector)
this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector())
);
} else {
matcherBundle = null;
Expand All @@ -122,7 +121,6 @@ default <T> FilterBundle makeFilterBundle(
* examine details about the index prior to computing it, via {@link BitmapColumnIndex#getIndexCapabilities()}.
*
* @param selector Object used to create BitmapColumnIndex
*
* @return BitmapColumnIndex that can build ImmutableBitmap of matched row numbers
*/
@Nullable
Expand All @@ -132,7 +130,6 @@ default <T> FilterBundle makeFilterBundle(
* Get a {@link ValueMatcher} that applies this filter to row values.
*
* @param factory Object used to create ValueMatchers
*
* @return ValueMatcher that applies this filter to row values.
*/
ValueMatcher makeMatcher(ColumnSelectorFactory factory);
Expand All @@ -141,7 +138,6 @@ default <T> FilterBundle makeFilterBundle(
* Get a {@link VectorValueMatcher} that applies this filter to row vectors.
*
* @param factory Object used to create ValueMatchers
*
* @return VectorValueMatcher that applies this filter to row vectors.
*/
default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory)
Expand All @@ -151,6 +147,7 @@ default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory

/**
* Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method.
*
* @param inspector Supplies type information for the selectors this filter will match against
*/
default boolean canVectorizeMatcher(ColumnInspector inspector)
Expand All @@ -176,7 +173,7 @@ default boolean supportsRequiredColumnRewrite()
* Return a copy of this filter that is identical to the this filter except that it operates on different columns,
* based on a renaming map where the key is the column to be renamed in the filter, and the value is the new
* column name.
*
* <p>
* For example, if I have a filter (A = hello), and I have a renaming map (A -> B),
* this should return the filter (B = hello)
*
Expand Down
177 changes: 132 additions & 45 deletions processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,21 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnIndexCapabilities;
import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
import org.apache.druid.segment.data.Offset;
import org.apache.druid.segment.filter.FalseFilter;
import org.apache.druid.segment.index.BitmapColumnIndex;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
Expand All @@ -57,27 +62,12 @@
*/
public class FilterBundle
{
public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap)
{
return new FilterBundle(
new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null),
emptyBitmap,
SimpleColumnIndexCapabilities.getConstant()
),
null
);
}

@Nullable
private final IndexBundle indexBundle;
@Nullable
private final MatcherBundle matcherBundle;

public FilterBundle(
@Nullable IndexBundle index,
@Nullable MatcherBundle matcherBundle
)
public FilterBundle(@Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle)
{
Preconditions.checkArgument(
index != null || matcherBundle != null,
Expand All @@ -87,6 +77,17 @@ public FilterBundle(
this.matcherBundle = matcherBundle;
}

public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap)
{
return new FilterBundle(
new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null),
emptyBitmap,
SimpleColumnIndexCapabilities.getConstant()
),
null
);
}

@Nullable
public IndexBundle getIndex()
Expand Down Expand Up @@ -151,6 +152,95 @@ public interface MatcherBundle
boolean canVectorize();
}

/**
* Wraps info needed to build a {@link FilterBundle}, and provides an estimated compute cost for
* {@link BitmapColumnIndex#computeBitmapResult}.
*/
public static class Builder
{
private final Filter filter;
private final ColumnIndexSelector columnIndexSelector;
@Nullable
private final BitmapColumnIndex bitmapColumnIndex;
private final List<FilterBundle.Builder> childBuilders;
private final int estimatedIndexComputeCost;

public Builder(Filter filter, ColumnIndexSelector columnIndexSelector, boolean cursorAutoArrangeFilters)
{
this.filter = filter;
this.columnIndexSelector = columnIndexSelector;
this.bitmapColumnIndex = filter.getBitmapColumnIndex(columnIndexSelector);
// Construct Builder instances for all child filters recursively.
if (filter instanceof BooleanFilter) {
Collection<Filter> childFilters = ((BooleanFilter) filter).getFilters();
this.childBuilders = new ArrayList<>(childFilters.size());
for (Filter childFilter : childFilters) {
this.childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector, cursorAutoArrangeFilters));
}
} else {
this.childBuilders = new ArrayList<>(0);
}
if (cursorAutoArrangeFilters) {
// Sort child builders by cost in ASCENDING order, should be stable by default.
this.childBuilders.sort(Comparator.comparingInt(FilterBundle.Builder::getEstimatedIndexComputeCost));
this.estimatedIndexComputeCost = calculateEstimatedIndexComputeCost();
} else {
this.estimatedIndexComputeCost = Integer.MAX_VALUE;
}
}

private int calculateEstimatedIndexComputeCost()
{
if (this.bitmapColumnIndex == null) {
return Integer.MAX_VALUE;
}
int cost = this.bitmapColumnIndex.estimatedComputeCost();
if (cost == Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}

for (FilterBundle.Builder childBuilder : childBuilders) {
int childCost = childBuilder.getEstimatedIndexComputeCost();
if (childCost >= Integer.MAX_VALUE - cost) {
return Integer.MAX_VALUE;
}
cost += childCost;
}
return cost;
}

public ColumnIndexSelector getColumnIndexSelector()
{
return columnIndexSelector;
}

@Nullable
public BitmapColumnIndex getBitmapColumnIndex()
{
return bitmapColumnIndex;
}

public List<FilterBundle.Builder> getChildBuilders()
{
return childBuilders;
}

public int getEstimatedIndexComputeCost()
{
return estimatedIndexComputeCost;
}

public <T> FilterBundle build(
BitmapResultFactory<T> bitmapResultFactory,
int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
return filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown);
}
}

public static class SimpleIndexBundle implements IndexBundle
{
private final IndexBundleInfo info;
Expand Down Expand Up @@ -211,11 +301,7 @@ public MatcherBundleInfo getMatcherInfo()
}

@Override
public ValueMatcher valueMatcher(
ColumnSelectorFactory selectorFactory,
Offset baseOffset,
boolean descending
)
public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending)
{
return matcherFn.apply(selectorFactory);
}
Expand Down Expand Up @@ -339,12 +425,11 @@ public List<IndexBundleInfo> getIndexes()
*/
public String describe()
{
final StringBuilder sb = new StringBuilder()
.append("index: ")
.append(filter.get())
.append(" (selectionSize = ")
.append(selectionSize)
.append(")\n");
final StringBuilder sb = new StringBuilder().append("index: ")
.append(filter.get())
.append(" (selectionSize = ")
.append(selectionSize)
.append(")\n");

if (indexes != null) {
for (final IndexBundleInfo info : indexes) {
Expand All @@ -358,23 +443,26 @@ public String describe()
@Override
public String toString()
{
return "{" +
"filter=\"" + filter.get() + '\"' +
", selectionSize=" + selectionSize +
", buildTime=" + TimeUnit.NANOSECONDS.toMicros(buildTimeNs) + "μs" +
(indexes != null ? ", indexes=" + indexes : "") +
'}';
return "{"
+ "filter=\""
+ filter.get()
+ '\"'
+ ", selectionSize="
+ selectionSize
+ ", buildTime="
+ TimeUnit.NANOSECONDS.toMicros(buildTimeNs)
+ "μs"
+ (indexes != null ? ", indexes=" + indexes : "")
+ '}';
}
}

public static class MatcherBundleInfo
{
private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");

private final Supplier<String> filter;
@Nullable
final List<MatcherBundleInfo> matchers;

private final Supplier<String> filter;
@Nullable
private final IndexBundleInfo partialIndex;

Expand Down Expand Up @@ -415,10 +503,7 @@ public List<MatcherBundleInfo> getMatchers()
*/
public String describe()
{
final StringBuilder sb = new StringBuilder()
.append("matcher: ")
.append(filter.get())
.append("\n");
final StringBuilder sb = new StringBuilder().append("matcher: ").append(filter.get()).append("\n");

if (partialIndex != null) {
sb.append(" with partial ")
Expand All @@ -437,11 +522,13 @@ public String describe()
@Override
public String toString()
{
return "{" +
"filter=\"" + filter.get() + '\"' +
(partialIndex != null ? ", partialIndex=" + partialIndex : "") +
(matchers != null ? ", matchers=" + matchers : "") +
'}';
return "{"
+ "filter=\""
+ filter.get()
+ '\"'
+ (partialIndex != null ? ", partialIndex=" + partialIndex : "")
+ (matchers != null ? ", matchers=" + matchers : "")
+ '}';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

/**
* This class provides a mechanism to influence whether or not indexes are used for a {@link Filter} during processing
* by {@link Filter#makeFilterBundle(ColumnIndexSelector, BitmapResultFactory, int, int, boolean)}
* by {@link Filter#makeFilterBundle(FilterBundle.Builder, BitmapResultFactory, int, int, boolean)}
* (i.e. will a {@link Filter} be a "pre" filter in which we union indexes for all values that match the filter to
* create a {@link org.apache.druid.segment.BitmapOffset}/{@link org.apache.druid.segment.vector.BitmapVectorOffset},
* or will it be used as a "post" filter and evaluated while scanning row values from the
Expand Down
Loading