From 2cbf0a2d516b4d4178f38163bdb4dc6a822ca442 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 8 Apr 2020 03:06:49 -0700 Subject: [PATCH 1/3] document useFilterCNF query context parameter --- docs/querying/query-context.md | 1 + website/.spelling | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md index 020527309530..44c51d2176dd 100644 --- a/docs/querying/query-context.md +++ b/docs/querying/query-context.md @@ -45,6 +45,7 @@ The query context is used for various query configuration parameters. The follow |parallelMergeParallelism|`druid.processing.merge.pool.parallelism`|Maximum number of parallel threads to use for parallel result merging on the Broker. See [Broker configuration](../configuration/index.html#broker) for more details.| |parallelMergeInitialYieldRows|`druid.processing.merge.task.initialYieldNumRows`|Number of rows to yield per ForkJoinPool merge task for parallel result merging on the Broker, before forking off a new task to continue merging sequences. See [Broker configuration](../configuration/index.html#broker) for more details.| |parallelMergeSmallBatchRows|`druid.processing.merge.task.smallBatchNumRows`|Size of result batches to operate on in ForkJoinPool merge tasks for parallel result merging on the Broker. See [Broker configuration](../configuration/index.html#broker) for more details.| +|useFilterCNF|`false`| If true, Druid will attempt to convert the query filter to Conjunctive Normal Form (CNF). During query processing, columns can be pre-filtered by intersecting the bitmap indexes of all values that match the eligible filters, often greatly reducing the raw number of rows which need to be scanned. But this effect only happens for the top level filter, or individual clauses of a top level 'and' filter. As such, filters in CNF potentially have a higher chance to utilize a large amount of bitmap indexes on string columns during pre-filtering. However, this setting should be used with great caution, as it can sometimes have a negative effect on performance, and in some cases, the act of computing CNF of a filter can be expensive. We recommend hand tuning your filters to produce an optimal form if possible, or at least verifying through experimentation that using this parameter actually improves your query performance with no ill-effects.| In addition, some query types offer context parameters specific to that query type. diff --git a/website/.spelling b/website/.spelling index b3f1deae329f..14c8be59d2f2 100644 --- a/website/.spelling +++ b/website/.spelling @@ -379,6 +379,7 @@ unmergeable unmerged unparseable unparsed +useFilterCNF uptime uris useFieldDiscovery From 5c2c087b4d728ce05883adf483d256da54a561d2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 8 Apr 2020 03:14:53 -0700 Subject: [PATCH 2/3] move context key to QueryContexts --- .../org/apache/druid/query/QueryContexts.java | 6 ++- .../apache/druid/segment/filter/Filters.java | 4 +- ...BaseHashJoinSegmentStorageAdapterTest.java | 2 +- .../HashJoinSegmentStorageAdapterTest.java | 46 +++++++++---------- .../segment/join/HashJoinSegmentTest.java | 4 +- .../segment/join/JoinFilterAnalyzerTest.java | 8 ++-- .../druid/segment/join/JoinablesTest.java | 6 +-- 7 files changed, 39 insertions(+), 37 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index ab3c02c353e6..b5a7be0b7dd8 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -52,6 +52,7 @@ public class QueryContexts public static final String JOIN_FILTER_REWRITE_ENABLE_KEY = "enableJoinFilterRewrite"; public static final String JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS_ENABLE_KEY = "enableJoinFilterRewriteValueColumnFilters"; public static final String JOIN_FILTER_REWRITE_MAX_SIZE_KEY = "joinFilterRewriteMaxSize"; + public static final String USE_FILTER_CNF_KEY = "useFilterCNF"; public static final boolean DEFAULT_BY_SEGMENT = false; public static final boolean DEFAULT_POPULATE_CACHE = true; @@ -67,7 +68,8 @@ public class QueryContexts public static final boolean DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN = true; public static final boolean DEFAULT_ENABLE_JOIN_FILTER_REWRITE = true; public static final boolean DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS = false; - public static final long DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY = 10000; + public static final long DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE = 10000; + public static final boolean DEFAULT_USE_FILTER_CNF = false; @SuppressWarnings("unused") // Used by Jackson serialization public enum Vectorize @@ -249,7 +251,7 @@ public static boolean getEnableJoinFilterRewriteValueColumnFilters(Query public static long getJoinFilterRewriteMaxSize(Query query) { - return parseLong(query, JOIN_FILTER_REWRITE_MAX_SIZE_KEY, DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY); + return parseLong(query, JOIN_FILTER_REWRITE_MAX_SIZE_KEY, DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE); } public static boolean getEnableJoinFilterPushDown(Query query) diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 2328dfb181fe..438225c6de8a 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -29,6 +29,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.Query; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.filter.BitmapIndexSelector; import org.apache.druid.query.filter.BooleanFilter; import org.apache.druid.query.filter.DimFilter; @@ -61,7 +62,6 @@ */ public class Filters { - private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF"; /** * Convert a list of DimFilters to a list of Filters. @@ -425,7 +425,7 @@ public static Filter convertToCNFFromQueryContext(Query query, @Nullable Filter if (filter == null) { return null; } - boolean useCNF = query.getContextBoolean(CTX_KEY_USE_FILTER_CNF, false); + boolean useCNF = query.getContextBoolean(QueryContexts.USE_FILTER_CNF_KEY, QueryContexts.DEFAULT_USE_FILTER_CNF); return useCNF ? toCNF(filter) : filter; } diff --git a/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java index 9903269561a4..a01a34b9c23c 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/BaseHashJoinSegmentStorageAdapterTest.java @@ -193,7 +193,7 @@ protected HashJoinSegmentStorageAdapter makeFactToCountrySegment() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); return new HashJoinSegmentStorageAdapter( diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index c8460fe34108..2294180da6e3 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -309,7 +309,7 @@ public void test_makeCursors_factToCountryLeft() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -377,7 +377,7 @@ public void test_makeCursors_factToCountryInner() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -439,7 +439,7 @@ public void test_makeCursors_factToCountryInnerUsingLookup() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -503,7 +503,7 @@ public void test_makeCursors_factToCountryInnerUsingCountryNumber() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -573,7 +573,7 @@ public void test_makeCursors_factToCountryInnerUsingCountryNumberUsingLookup() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -639,7 +639,7 @@ public void test_makeCursors_factToCountryLeftWithFilterOnFacts() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -680,7 +680,7 @@ public void test_makeCursors_factToCountryRightWithFilterOnLeftIsNull() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -723,7 +723,7 @@ public void test_makeCursors_factToCountryFullWithFilterOnLeftIsNull() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -771,7 +771,7 @@ public void test_makeCursors_factToCountryRightWithFilterOnJoinable() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -819,7 +819,7 @@ public void test_makeCursors_factToCountryLeftWithFilterOnJoinable() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -866,7 +866,7 @@ public void test_makeCursors_factToCountryLeftWithFilterOnJoinableUsingLookup() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -926,7 +926,7 @@ public void test_makeCursors_factToCountryInnerWithFilterInsteadOfRealJoinCondit true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -992,7 +992,7 @@ public void test_makeCursors_factToRegionToCountryLeft() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1071,7 +1071,7 @@ public void test_makeCursors_factToCountryAlwaysTrue() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( new HashJoinSegmentStorageAdapter( @@ -1138,7 +1138,7 @@ public void test_makeCursors_factToCountryAlwaysFalse() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1189,7 +1189,7 @@ public void test_makeCursors_factToCountryAlwaysTrueUsingLookup() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1257,7 +1257,7 @@ public void test_makeCursors_factToCountryAlwaysFalseUsingLookup() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1317,7 +1317,7 @@ public void test_makeCursors_factToCountryUsingVirtualColumn() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1375,7 +1375,7 @@ public void test_makeCursors_factToCountryUsingExpression() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1435,7 +1435,7 @@ public void test_makeCursors_factToRegionTheWrongWay() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( @@ -1495,7 +1495,7 @@ public void test_makeCursors_errorOnNonEquiJoin() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.readCursors( @@ -1541,7 +1541,7 @@ public void test_makeCursors_errorOnNonKeyBasedJoin() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.readCursors( @@ -1574,7 +1574,7 @@ public void test_makeCursors_factToCountryLeft_filterExcludesAllLeftRows() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); JoinTestHelper.verifyCursors( diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java index 4df728fc75a3..40a4863e9590 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentTest.java @@ -88,7 +88,7 @@ public void setUp() throws IOException true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); hashJoinSegment = new HashJoinSegment( @@ -113,7 +113,7 @@ public void test_constructor_noClauses() true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); final HashJoinSegment ignored = new HashJoinSegment( diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java index eb3b6fc64a8d..ae97b7ea33bc 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java @@ -471,7 +471,7 @@ public void test_filterPushDown_factToRegionFilterOnRHSRegionNameExprVirtualColu true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( @@ -1476,7 +1476,7 @@ public void test_filterPushDown_factToRegionToCountryLeftFilterOnPageDisablePush false, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( factSegment.asStorageAdapter(), @@ -1548,7 +1548,7 @@ public void test_filterPushDown_factToRegionToCountryLeftEnablePushDownDisableRe true, false, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( factSegment.asStorageAdapter(), @@ -1752,7 +1752,7 @@ private static JoinFilterPreAnalysis simplePreAnalysis( true, true, true, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java b/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java index 002c6dd0bf81..4fa521dbcbc9 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinablesTest.java @@ -102,7 +102,7 @@ public void test_createSegmentMapFn_noClauses() QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); @@ -131,7 +131,7 @@ public void test_createSegmentMapFn_unusableClause() QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); @@ -168,7 +168,7 @@ public void test_createSegmentMapFn_usableClause() QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE, QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS, - QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE_KEY, + QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE, null, VirtualColumns.EMPTY ); From ee9f6e98deaa16ad927583d67b4fea4175efbd24 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 8 Apr 2020 18:53:46 -0700 Subject: [PATCH 3/3] Update .spelling --- website/.spelling | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/.spelling b/website/.spelling index 14c8be59d2f2..0ee98ddccf1f 100644 --- a/website/.spelling +++ b/website/.spelling @@ -40,6 +40,7 @@ CORS CPUs CSVs Ceph +CNF ColumnDescriptor Corretto DDL @@ -307,6 +308,8 @@ pre-computation pre-compute pre-computing pre-configured +pre-filtered +pre-filtering pre-generated pre-made pre-processing @@ -1727,4 +1730,4 @@ regionName - ../docs/development/extensions-core/druid-ranger-security.md json metastore -UserGroupInformation \ No newline at end of file +UserGroupInformation