From 06ecb1562e53bcd56ec526b2eee8b8ec20c2f1fb Mon Sep 17 00:00:00 2001 From: jon-wei Date: Tue, 30 Jun 2020 22:00:44 -0700 Subject: [PATCH 1/4] Ensure that join filter pre-analysis operates on optimized filters, add DimFilter.toOptimizedFilter --- .../druid/query/filter/BloomDimFilter.java | 2 +- .../filter/AbstractOptimizableDimFilter.java | 40 +++++++++ .../druid/query/filter/AndDimFilter.java | 2 +- .../druid/query/filter/BoundDimFilter.java | 2 +- .../filter/ColumnComparisonDimFilter.java | 2 +- .../apache/druid/query/filter/DimFilter.java | 16 ++++ .../query/filter/ExpressionDimFilter.java | 2 +- .../query/filter/ExtractionDimFilter.java | 2 +- .../druid/query/filter/FalseDimFilter.java | 2 +- .../druid/query/filter/InDimFilter.java | 2 +- .../druid/query/filter/IntervalDimFilter.java | 2 +- .../query/filter/JavaScriptDimFilter.java | 2 +- .../druid/query/filter/LikeDimFilter.java | 2 +- .../druid/query/filter/NotDimFilter.java | 2 +- .../druid/query/filter/OrDimFilter.java | 2 +- .../druid/query/filter/RegexDimFilter.java | 2 +- .../query/filter/SearchQueryDimFilter.java | 2 +- .../druid/query/filter/SelectorDimFilter.java | 2 +- .../druid/query/filter/SpatialDimFilter.java | 2 +- .../druid/query/filter/TrueDimFilter.java | 2 +- .../groupby/GroupByQueryQueryToolChest.java | 10 +-- .../apache/druid/query/scan/ScanQuery.java | 5 -- .../query/scan/ScanQueryQueryToolChest.java | 5 -- .../druid/query/search/SearchQuery.java | 5 -- .../search/SearchQueryQueryToolChest.java | 5 -- .../TimeseriesQueryQueryToolChest.java | 5 -- .../apache/druid/query/topn/TopNQuery.java | 5 -- .../query/topn/TopNQueryQueryToolChest.java | 12 +-- .../apache/druid/segment/filter/Filters.java | 2 +- .../join/HashJoinSegmentStorageAdapter.java | 4 + .../query/filter/FalseDimFilterTest.java | 5 +- .../druid/query/filter/TrueDimFilterTest.java | 5 +- .../druid/sql/calcite/CalciteQueryTest.java | 82 +++++++++++++++++++ 33 files changed, 176 insertions(+), 66 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java index 691826931459..a22fec727464 100644 --- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java +++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java @@ -38,7 +38,7 @@ /** */ -public class BloomDimFilter implements DimFilter +public class BloomDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java new file mode 100644 index 000000000000..1566fa38e663 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +/** + * Base class for DimFilters that support optimization. + */ +abstract class AbstractOptimizableDimFilter implements DimFilter +{ + private Filter cachedOptimizedFilter = null; + + @JsonIgnore + @Override + public Filter toOptimizedFilter() + { + if (cachedOptimizedFilter == null) { + cachedOptimizedFilter = optimize().toFilter(); + } + return cachedOptimizedFilter; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java index 18df9d67a170..9e29658e678c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/AndDimFilter.java @@ -37,7 +37,7 @@ /** */ -public class AndDimFilter implements DimFilter +public class AndDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private static final Joiner AND_JOINER = Joiner.on(" && "); diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 98190a76f808..4b6fc8799683 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -47,7 +47,7 @@ import java.util.Objects; import java.util.Set; -public class BoundDimFilter implements DimFilter +public class BoundDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/filter/ColumnComparisonDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ColumnComparisonDimFilter.java index dd97ce2808cf..bff805ce4708 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ColumnComparisonDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ColumnComparisonDimFilter.java @@ -35,7 +35,7 @@ /** */ -public class ColumnComparisonDimFilter implements DimFilter +public class ColumnComparisonDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private static final Joiner COMMA_JOINER = Joiner.on(", "); diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index 6a38457f30c4..d10ec7700f2f 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -23,7 +23,11 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.google.common.collect.RangeSet; import org.apache.druid.java.util.common.Cacheable; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.segment.VirtualColumns; +import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.Set; @@ -58,6 +62,18 @@ public interface DimFilter extends Cacheable */ DimFilter optimize(); + /** + * @return Return a Filter that implements this DimFilter, after applying optimizations to this DimFilter. + * A typical implementation will return the result of `optimize().toFilter()` + * See abstract base class {@link AbstractOptimizableDimFilter} for a common implementation shared by + * current DimFilters. + * + * The Filter returned by this method across multiple calls must be the same object: parts of the query stack + * compare Filters, and returning the same object allows these checks to avoid deep comparisons. + * (see {@link org.apache.druid.segment.join.HashJoinSegmentStorageAdapter#makeCursors for an example} + */ + Filter toOptimizedFilter(); + /** * Returns a Filter that implements this DimFilter. This does not generally involve optimizing the DimFilter, * so it does make sense to optimize first and then call toFilter on the resulting DimFilter. diff --git a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java index d902d598192a..2f65cc64fdad 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ExpressionDimFilter.java @@ -37,7 +37,7 @@ import java.util.Objects; import java.util.Set; -public class ExpressionDimFilter implements DimFilter +public class ExpressionDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String expression; private final Supplier parsed; diff --git a/processing/src/main/java/org/apache/druid/query/filter/ExtractionDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ExtractionDimFilter.java index fe84a9e340eb..0bf706344aaa 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/ExtractionDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/ExtractionDimFilter.java @@ -34,7 +34,7 @@ * This class is deprecated, use SelectorDimFilter instead: {@link SelectorDimFilter} */ @Deprecated -public class ExtractionDimFilter implements DimFilter +public class ExtractionDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; private final String value; diff --git a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java index 4b21e5eb4939..14bfbc0b1d0c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/FalseDimFilter.java @@ -29,7 +29,7 @@ import java.util.Collections; import java.util.Set; -public class FalseDimFilter implements DimFilter +public class FalseDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private static final FalseDimFilter INSTANCE = new FalseDimFilter(); private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build(); diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index d5ec5588ba38..25688ba87f8d 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -61,7 +61,7 @@ import java.util.Set; import java.util.stream.Collectors; -public class InDimFilter implements DimFilter +public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter { // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet diff --git a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java index c1e7ec0d7584..bb6d56972c50 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/IntervalDimFilter.java @@ -42,7 +42,7 @@ import java.util.Objects; import java.util.Set; -public class IntervalDimFilter implements DimFilter +public class IntervalDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final List intervals; private final List> intervalLongs; diff --git a/processing/src/main/java/org/apache/druid/query/filter/JavaScriptDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/JavaScriptDimFilter.java index 9329e865fb62..62a379a36a45 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/JavaScriptDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/JavaScriptDimFilter.java @@ -43,7 +43,7 @@ import java.util.Objects; import java.util.Set; -public class JavaScriptDimFilter implements DimFilter +public class JavaScriptDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; private final String function; diff --git a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java index 06a6e2b0cda3..a92bd44fa745 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/LikeDimFilter.java @@ -41,7 +41,7 @@ import java.util.Set; import java.util.regex.Pattern; -public class LikeDimFilter implements DimFilter +public class LikeDimFilter extends AbstractOptimizableDimFilter implements DimFilter { // Regex matching characters that are definitely okay to include unescaped in a regex. // Leads to excessively paranoid escaping, although shouldn't affect runtime beyond compiling the regex. diff --git a/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java index 0cd2969fe698..0b154694381e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NotDimFilter.java @@ -32,7 +32,7 @@ /** */ -public class NotDimFilter implements DimFilter +public class NotDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final DimFilter field; diff --git a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java index ca8c105b7a3d..d758d151b484 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/OrDimFilter.java @@ -38,7 +38,7 @@ /** */ -public class OrDimFilter implements DimFilter +public class OrDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private static final Joiner OR_JOINER = Joiner.on(" || "); diff --git a/processing/src/main/java/org/apache/druid/query/filter/RegexDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RegexDimFilter.java index b696ad9ff9dd..e78a12d9be14 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RegexDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RegexDimFilter.java @@ -38,7 +38,7 @@ /** */ -public class RegexDimFilter implements DimFilter +public class RegexDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; private final String pattern; diff --git a/processing/src/main/java/org/apache/druid/query/filter/SearchQueryDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SearchQueryDimFilter.java index 757cc7955875..f89d8d254b9e 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SearchQueryDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SearchQueryDimFilter.java @@ -37,7 +37,7 @@ /** */ -public class SearchQueryDimFilter implements DimFilter +public class SearchQueryDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; private final SearchQuerySpec query; diff --git a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java index d27f4136ac02..c717dbead311 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java @@ -41,7 +41,7 @@ /** * */ -public class SelectorDimFilter implements DimFilter +public class SelectorDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; diff --git a/processing/src/main/java/org/apache/druid/query/filter/SpatialDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SpatialDimFilter.java index c7a124820f11..6620203639f5 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SpatialDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SpatialDimFilter.java @@ -37,7 +37,7 @@ /** */ -public class SpatialDimFilter implements DimFilter +public class SpatialDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private final String dimension; private final Bound bound; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java index af297103aa71..dda337bcadc0 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TrueDimFilter.java @@ -27,7 +27,7 @@ import java.util.Collections; import java.util.Set; -public class TrueDimFilter implements DimFilter +public class TrueDimFilter extends AbstractOptimizableDimFilter implements DimFilter { private static final TrueDimFilter INSTANCE = new TrueDimFilter(); private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java index 741b8f6dd978..83378fbbe527 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java @@ -472,13 +472,9 @@ public QueryRunner preMergeQueryDecoration(final QueryRunner run(QueryPlus queryPlus, ResponseContext responseContext) { GroupByQuery groupByQuery = (GroupByQuery) queryPlus.getQuery(); - if (groupByQuery.getDimFilter() != null) { - groupByQuery = groupByQuery.withDimFilter(groupByQuery.getDimFilter().optimize()); - } - final GroupByQuery delegateGroupByQuery = groupByQuery; final List dimensionSpecs = new ArrayList<>(); - final BitSet optimizedDimensions = extractionsToRewrite(delegateGroupByQuery); - final List dimensions = delegateGroupByQuery.getDimensions(); + final BitSet optimizedDimensions = extractionsToRewrite(groupByQuery); + final List dimensions = groupByQuery.getDimensions(); for (int i = 0; i < dimensions.size(); i++) { final DimensionSpec dimensionSpec = dimensions.get(i); if (optimizedDimensions.get(i)) { @@ -491,7 +487,7 @@ public Sequence run(QueryPlus queryPlus, ResponseContext r } return runner.run( - queryPlus.withQuery(delegateGroupByQuery.withDimensionSpecs(dimensionSpecs)), + queryPlus.withQuery(groupByQuery.withDimensionSpecs(dimensionSpecs)), responseContext ); } diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java index be5d24b71b34..b87a1c4c4bbb 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java @@ -294,11 +294,6 @@ public Query withOverriddenContext(Map contextO return Druids.ScanQueryBuilder.copy(this).context(computeOverriddenContext(getContext(), contextOverrides)).build(); } - public ScanQuery withDimFilter(DimFilter dimFilter) - { - return Druids.ScanQueryBuilder.copy(this).filters(dimFilter).build(); - } - @Override public boolean equals(final Object o) { diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java index cc6ae068f823..21550e1280fb 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java @@ -118,11 +118,6 @@ public TypeReference getResultTypeReference() public QueryRunner preMergeQueryDecoration(final QueryRunner runner) { return (queryPlus, responseContext) -> { - ScanQuery scanQuery = (ScanQuery) queryPlus.getQuery(); - if (scanQuery.getFilter() != null) { - scanQuery = scanQuery.withDimFilter(scanQuery.getFilter().optimize()); - queryPlus = queryPlus.withQuery(scanQuery); - } return runner.run(queryPlus, responseContext); }; } diff --git a/processing/src/main/java/org/apache/druid/query/search/SearchQuery.java b/processing/src/main/java/org/apache/druid/query/search/SearchQuery.java index 4182c5b6d72f..c2c2fb99de62 100644 --- a/processing/src/main/java/org/apache/druid/query/search/SearchQuery.java +++ b/processing/src/main/java/org/apache/druid/query/search/SearchQuery.java @@ -109,11 +109,6 @@ public SearchQuery withOverriddenContext(Map contextOverrides) return Druids.SearchQueryBuilder.copy(this).context(newContext).build(); } - public SearchQuery withDimFilter(DimFilter dimFilter) - { - return Druids.SearchQueryBuilder.copy(this).filters(dimFilter).build(); - } - @JsonProperty("filter") public DimFilter getDimensionsFilter() { diff --git a/processing/src/main/java/org/apache/druid/query/search/SearchQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/search/SearchQueryQueryToolChest.java index 08ee67b2918e..4f0563317d9e 100644 --- a/processing/src/main/java/org/apache/druid/query/search/SearchQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/search/SearchQueryQueryToolChest.java @@ -322,11 +322,6 @@ public QueryRunner> preMergeQueryDecoration(final Quer { return new SearchThresholdAdjustingQueryRunner( (queryPlus, responseContext) -> { - SearchQuery searchQuery = (SearchQuery) queryPlus.getQuery(); - if (searchQuery.getDimensionsFilter() != null) { - searchQuery = searchQuery.withDimFilter(searchQuery.getDimensionsFilter().optimize()); - queryPlus = queryPlus.withQuery(searchQuery); - } return runner.run(queryPlus, responseContext); }, config diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 2ce7e3b34632..2718282c908b 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -377,11 +377,6 @@ public Result apply(Object input) public QueryRunner> preMergeQueryDecoration(final QueryRunner> runner) { return (queryPlus, responseContext) -> { - TimeseriesQuery timeseriesQuery = (TimeseriesQuery) queryPlus.getQuery(); - if (timeseriesQuery.getDimensionsFilter() != null) { - timeseriesQuery = timeseriesQuery.withDimFilter(timeseriesQuery.getDimensionsFilter().optimize()); - queryPlus = queryPlus.withQuery(timeseriesQuery); - } return runner.run(queryPlus, responseContext); }; } diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQuery.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQuery.java index f5bdec1982a0..8db446bf74d8 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQuery.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQuery.java @@ -203,11 +203,6 @@ public TopNQuery withOverriddenContext(Map contextOverrides) return new TopNQueryBuilder(this).context(computeOverriddenContext(getContext(), contextOverrides)).build(); } - public TopNQuery withDimFilter(DimFilter dimFilter) - { - return new TopNQueryBuilder(this).filters(dimFilter).build(); - } - @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java index 18501259b812..2401d9790e12 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java @@ -425,14 +425,10 @@ public QueryRunner> preMergeQueryDecoration(final QueryR { return (queryPlus, responseContext) -> { TopNQuery topNQuery = (TopNQuery) queryPlus.getQuery(); - if (topNQuery.getDimensionsFilter() != null) { - topNQuery = topNQuery.withDimFilter(topNQuery.getDimensionsFilter().optimize()); - } - final TopNQuery delegateTopNQuery = topNQuery; - if (TopNQueryEngine.canApplyExtractionInPost(delegateTopNQuery)) { - final DimensionSpec dimensionSpec = delegateTopNQuery.getDimensionSpec(); + if (TopNQueryEngine.canApplyExtractionInPost(topNQuery)) { + final DimensionSpec dimensionSpec = topNQuery.getDimensionSpec(); QueryPlus> delegateQueryPlus = queryPlus.withQuery( - delegateTopNQuery.withDimensionSpec( + topNQuery.withDimensionSpec( new DefaultDimensionSpec( dimensionSpec.getDimension(), dimensionSpec.getOutputName() @@ -441,7 +437,7 @@ public QueryRunner> preMergeQueryDecoration(final QueryR ); return runner.run(delegateQueryPlus, responseContext); } else { - return runner.run(queryPlus.withQuery(delegateTopNQuery), responseContext); + return runner.run(queryPlus.withQuery(topNQuery), responseContext); } }; } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 990127f99f04..399adb9eaaf3 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -86,7 +86,7 @@ public static Set toFilters(List dimFilters) @Nullable public static Filter toFilter(@Nullable DimFilter dimFilter) { - return dimFilter == null ? null : dimFilter.toFilter(); + return dimFilter == null ? null : dimFilter.toOptimizedFilter(); } /** diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index e23a2d4df2b9..51e386a9dce6 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -223,6 +223,10 @@ public Sequence makeCursors( final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); + if (keyIn.getFilter() != keyCached.getFilter()) { + throw new ISE("Pre-analysis filter and cursor filter are not the same object"); + } + if (!keyIn.equals(keyCached)) { // It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects // will be used and therefore deep equality checks will be unnecessary. diff --git a/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java index a512bcac7389..2469c13721fe 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/FalseDimFilterTest.java @@ -42,6 +42,9 @@ public void testSerde() throws IOException @Test public void testEquals() { - EqualsVerifier.forClass(FalseDimFilter.class).usingGetClass().verify(); + EqualsVerifier.forClass(FalseDimFilter.class) + .usingGetClass() + .withIgnoredFields("cachedOptimizedFilter") + .verify(); } } diff --git a/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java index 3c8838278ef2..9d504aef73ae 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/TrueDimFilterTest.java @@ -42,6 +42,9 @@ public void testSerde() throws IOException @Test public void testEquals() { - EqualsVerifier.forClass(TrueDimFilter.class).usingGetClass().verify(); + EqualsVerifier.forClass(TrueDimFilter.class) + .usingGetClass() + .withIgnoredFields("cachedOptimizedFilter") + .verify(); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 17c970fa138d..2ee41c8ee477 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -36,10 +36,12 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.PeriodGranularity; +import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.Druids; import org.apache.druid.query.GlobalTableDataSource; import org.apache.druid.query.LookupDataSource; +import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.QueryException; @@ -85,6 +87,7 @@ import org.apache.druid.query.filter.RegexDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec.Direction; @@ -97,6 +100,8 @@ import org.apache.druid.query.topn.TopNQueryBuilder; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.join.JoinType; +import org.apache.druid.server.QueryLifecycle; +import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.Calcites; @@ -11990,6 +11995,83 @@ public void testNestedGroupByOnInlineDataSourceWithFilter(Map qu ); } + @Test + @Parameters(source = QueryContextForJoinProvider.class) + public void testGroupByJoinAsNativeQueryWithUnoptimizedFilter(Map queryContext) + { + // The query below is the same as the inner groupBy on a join datasource from the test + // testNestedGroupByOnInlineDataSourceWithFilter, except that the selector filter + // dim1=def has been rewritten into an unoptimized filter, dim1 IN (def). + // + // The unoptimized filter will be optimized into dim1=def by the query toolchests in their + // pre-merge decoration function, when it calls DimFilter.optimize(). + // + // This test's goal is to ensure that the join filter rewrites function correctly when there are + // unoptimized filters in the join query. The rewrite logic must apply to the optimized form of the filters, + // as this is what will be passed to HashJoinSegmentAdapter.makeCursors(), where the result of the join + // filter pre-analysis is used. + // + // A native query is used because the filter types where we support optimization are the AND/OR/NOT and + // IN filters. However, when expressed in a SQL query, our SQL planning layer is smart enough to already apply + // these optimizations in the native query it generates, making it impossible to test the unoptimized filter forms + // using SQL queries. + // + // The test method is placed here for convenience as this class provides the necessary setup. + Query query = GroupByQuery + .builder() + .setDataSource( + join( + new QueryDataSource( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))) + .columns("dim1") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(queryContext) + .build() + ), + new QueryDataSource( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Intervals.of("2001-01-02T00:00:00.000Z/146140482-04-24T15:36:27.903Z"))) + .columns("dim1", "m2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(queryContext) + .build() + ), + "j0.", + equalsCondition( + DruidExpression.fromColumn("dim1"), + DruidExpression.fromColumn("j0.dim1") + ), + JoinType.INNER + ) + ) + .setGranularity(Granularities.ALL) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setDimFilter(in("dim1", Collections.singletonList("def"), null)) // provide an unoptimized IN filter + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0") + ) + ) + .setVirtualColumns(expressionVirtualColumn("v0", "'def'", ValueType.STRING)) + .build(); + + QueryLifecycleFactory qlf = CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate); + QueryLifecycle ql = qlf.factorize(); + Sequence seq = ql.runSimple( + query, + CalciteTests.SUPER_USER_AUTH_RESULT, + null + ); + List results = seq.toList(); + Assert.assertEquals( + ImmutableList.of(ResultRow.of("def")), + results + ); + } + @Test public void testUsingSubqueryAsFilterOnTwoColumns() throws Exception { From 83fd145e3fde446d088ed6c0b5ea87c4bd426c8e Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 1 Jul 2020 12:38:34 -0700 Subject: [PATCH 2/4] Remove aggressive equality check that was used for testing --- .../druid/segment/join/HashJoinSegmentStorageAdapter.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index 51e386a9dce6..e23a2d4df2b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -223,10 +223,6 @@ public Sequence makeCursors( final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); - if (keyIn.getFilter() != keyCached.getFilter()) { - throw new ISE("Pre-analysis filter and cursor filter are not the same object"); - } - if (!keyIn.equals(keyCached)) { // It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects // will be used and therefore deep equality checks will be unnecessary. From 8d32bbdad32346c9189dba05495985710dd5b334 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 1 Jul 2020 15:16:06 -0700 Subject: [PATCH 3/4] Use Suppliers.memoize --- .../query/filter/AbstractOptimizableDimFilter.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java index 1566fa38e663..24d32d323a61 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/AbstractOptimizableDimFilter.java @@ -20,21 +20,23 @@ package org.apache.druid.query.filter; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; /** - * Base class for DimFilters that support optimization. + * Base class for DimFilters that support optimization. This abstract class provides a default implementation of + * toOptimizedFilter that relies on the existing optimize() and toFilter() methods. It uses a memoized supplier. */ abstract class AbstractOptimizableDimFilter implements DimFilter { - private Filter cachedOptimizedFilter = null; + private final Supplier cachedOptimizedFilter = Suppliers.memoize( + () -> optimize().toFilter() + ); @JsonIgnore @Override public Filter toOptimizedFilter() { - if (cachedOptimizedFilter == null) { - cachedOptimizedFilter = optimize().toFilter(); - } - return cachedOptimizedFilter; + return cachedOptimizedFilter.get(); } } From b013ac4ca3e92fd3c061299fc48ed8e43e14e729 Mon Sep 17 00:00:00 2001 From: jon-wei Date: Wed, 1 Jul 2020 19:53:40 -0700 Subject: [PATCH 4/4] Checkstyle --- .../main/java/org/apache/druid/query/filter/DimFilter.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index d10ec7700f2f..60580881c392 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -23,11 +23,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.google.common.collect.RangeSet; import org.apache.druid.java.util.common.Cacheable; -import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.segment.VirtualColumns; -import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.Set;