From 8c26d4ccc0eb9235bf20675f85139fe18999e085 Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Fri, 14 Jul 2023 12:36:26 -0700 Subject: [PATCH 1/6] fix filtering bug in filtering unnest cols and dim cols --- .../druid/segment/UnnestStorageAdapter.java | 84 +++++-- .../segment/UnnestStorageAdapterTest.java | 49 ++++ .../sql/calcite/CalciteArraysQueryTest.java | 216 ++++++++++++++++++ 3 files changed, 324 insertions(+), 25 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 048cd10f8efa..69825af591fc 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -38,6 +38,7 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.LikeFilter; @@ -366,31 +367,19 @@ void addPreFilter(@Nullable final Filter filter) final FilterSplitter filterSplitter = new FilterSplitter(); if (queryFilter != null) { - List preFilterList = new ArrayList<>(); - final int origFilterSize; if (queryFilter.getRequiredColumns().contains(outputColumnName)) { // outside filter contains unnested column - // requires check for OR - if (queryFilter instanceof OrFilter) { - origFilterSize = ((OrFilter) queryFilter).getFilters().size(); - for (Filter filter : ((OrFilter) queryFilter).getFilters()) { - if (filter.getRequiredColumns().contains(outputColumnName)) { - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( - filter, - inputColumn, - inputColumnCapabilites - ); - if (newFilter != null) { - preFilterList.add(newFilter); - } - } else { - preFilterList.add(filter); - } - } - if (preFilterList.size() == origFilterSize) { - // there has been successful rewrites - final OrFilter preOrFilter = new OrFilter(preFilterList); - filterSplitter.addPreFilter(preOrFilter); + // requires check for OR and And filters, disqualify rewrite for non-unnest filters + if (queryFilter instanceof BooleanFilter) { + List preFilterList = recursiveRewriteOnUnnestFilters( + (BooleanFilter) queryFilter, + inputColumn, + inputColumnCapabilites + ); + if (queryFilter instanceof AndFilter) { + filterSplitter.addPreFilter(new AndFilter(preFilterList)); + } else if (queryFilter instanceof OrFilter) { + filterSplitter.addPreFilter(new OrFilter(preFilterList)); } // add the entire query filter to unnest filter to be used in Value matcher filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true); @@ -412,7 +401,53 @@ void addPreFilter(@Nullable final Filter filter) ); } - + /** + * handles the nested rewrite for unnest columns in recursive way, + * it loops through all and/or filters and rewrite only required filters in the child and skip others + * + * @param queryFilter query filter passed to makeCursors + * @param inputColumn input column to unnest if it's a direct access; otherwise null + * @param inputColumnCapabilites input column capabilities if known; otherwise null + */ + private List recursiveRewriteOnUnnestFilters( + BooleanFilter queryFilter, + final String inputColumn, + final ColumnCapabilities inputColumnCapabilites + ) + { + final List preFilterList = new ArrayList<>(); + for (Filter filter : queryFilter.getFilters()) { + if (filter.getRequiredColumns().contains(outputColumnName)) { + if (filter instanceof AndFilter) { + preFilterList.add(new AndFilter(recursiveRewriteOnUnnestFilters( + (BooleanFilter) filter, + inputColumn, + inputColumnCapabilites + ))); + } else if (filter instanceof OrFilter) { + preFilterList.add(new OrFilter(recursiveRewriteOnUnnestFilters( + (BooleanFilter) filter, + inputColumn, + inputColumnCapabilites + ))); + } else { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( + filter, + inputColumn, + inputColumnCapabilites + ); + if (newFilter != null) { + preFilterList.add(newFilter); + } else { + preFilterList.add(filter); + } + } + } else { + preFilterList.add(filter); + } + } + return preFilterList; + } /** * Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null. */ @@ -465,7 +500,6 @@ static boolean filterMapsOverMultiValueStrings(final Filter filter) return false; } } - return true; } else if (filter instanceof NotFilter) { return filterMapsOverMultiValueStrings(((NotFilter) filter).getBaseFilter()); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 480c86f51fd4..73eb65fa0314 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -316,7 +316,56 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest return null; }); } + @Test + public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters() + { + final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( + new TestStorageAdapter(INCREMENTAL_INDEX), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + null + ); + + final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); + + final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); + + final OrFilter baseFilter = new OrFilter(ImmutableList.of( + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), + new AndFilter(ImmutableList.of( + new SelectorDimFilter(inputColumn, "2", null).toFilter(), + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "10", null).toFilter() + )) + )); + + final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( + new SelectorDimFilter(inputColumn, "1", null).toFilter(), + new AndFilter(ImmutableList.of( + new SelectorDimFilter(inputColumn, "2", null).toFilter(), + new SelectorDimFilter(inputColumn, "10", null).toFilter() + )) + )); + + final Sequence cursorSequence = unnestStorageAdapter.makeCursors( + baseFilter, + unnestStorageAdapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + final TestStorageAdapter base = (TestStorageAdapter) unnestStorageAdapter.getBaseAdapter(); + final Filter pushDownFilter = base.getPushDownFilter(); + Assert.assertEquals(expectedPushDownFilter, pushDownFilter); + cursorSequence.accumulate(null, (accumulated, cursor) -> { + Assert.assertEquals(cursor.getClass(), PostJoinCursor.class); + final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter(); + // OR-case so base filter should match the postJoinFilter + Assert.assertEquals(baseFilter, postFilter); + return null; + }); + } @Test public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index f5ea4ea60980..3bddac25ec7a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2880,6 +2880,222 @@ public void testUnnestTwiceWithFiltersAndExpressions() ); } + @Test + public void testUnnestThriceWithFiltersOnDimAndUnnestCol() + { + cannotVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND dim3_unnest1='Baz'"; + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), null + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(and( + bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + new SelectorDimFilter("j0.unnest", "Baz", null) + )) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Hello"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Hello"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Hello"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Hello"}, + new Object[]{"27", "Baz", "World", "World"} + ) + ); + } + @Test + public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() + { + cannotVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND dim3_unnest1='Baz' AND dim3_unnest2='Hello' AND dim3_unnest3='World'"; + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), new SelectorDimFilter("_j0.unnest", "Hello", null) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + new SelectorDimFilter("__j0.unnest", "World", null) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(and( + bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + new SelectorDimFilter("j0.unnest", "Baz", null) + )) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "Hello", "World"} + ) + ); + } + + @Test + public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations() + { + cannotVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND (dim3_unnest1='Baz' OR dim3_unnest2='Hello') AND dim3_unnest3='World'"; + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), null + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + new SelectorDimFilter("__j0.unnest", "World", null) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(and( + bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + or( + new SelectorDimFilter("j0.unnest", "Baz", null), + new SelectorDimFilter("_j0.unnest", "Hello", null) + ) + )) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Hello", "Hello", "World"}, + new Object[]{"27", "World", "Hello", "World"} + ) + ); + } @Test public void testUnnestWithGroupBy() { From c408620120c07dbaf174ab8c90bf63b696d2583e Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Sat, 29 Jul 2023 17:53:52 -0700 Subject: [PATCH 2/6] Refactor logic for unnest filters/query filters and add tests --- .../druid/segment/UnnestStorageAdapter.java | 174 +++++++++++------- .../apache/druid/segment/filter/Filters.java | 16 ++ .../segment/UnnestStorageAdapterTest.java | 138 ++++++++++++-- .../druid/segment/filter/FilterTestUtils.java | 18 ++ .../sql/calcite/CalciteArraysQueryTest.java | 32 +++- 5 files changed, 290 insertions(+), 88 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 69825af591fc..bab1655a6bff 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -315,71 +315,30 @@ to generate filters to be passed to base cursor (filtersPushedDownToBaseCursor) filtersPushedDownToBaseCursor -> null (as the filter cannot be re-written due to presence of virtual columns) filtersForPostUnnestCursor -> d12 IN (a,b) or m1 < 10 */ - class FilterSplitter - { - final List filtersPushedDownToBaseCursor = new ArrayList<>(); - final List filtersForPostUnnestCursor = new ArrayList<>(); - - void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) - { - if (filter == null) { - return; - } - if (!skipPreFilters) { - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); - if (newFilter != null) { - // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting - // any rows that do not match this filter at all. - filtersPushedDownToBaseCursor.add(newFilter); - } - } - // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. - filtersForPostUnnestCursor.add(filter); - } - - void addPreFilter(@Nullable final Filter filter) - { - if (filter == null) { - return; - } - - final Set requiredColumns = filter.getRequiredColumns(); - - // Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call - // that perhaps forces the code to use a ValueMatcher where an index would've been available, - // which can have real performance implications. This is an interim choice made to value correctness - // over performance. When we need to optimize this performance, we should be able to - // create a VirtualColumnDatasource that contains all the virtual columns, in which case the query - // itself would stop carrying them and everything should be able to be pushed down. - if (queryVirtualColumns.getVirtualColumns().length > 0) { - for (String column : requiredColumns) { - if (queryVirtualColumns.exists(column)) { - filtersForPostUnnestCursor.add(filter); - return; - } - } - } - filtersPushedDownToBaseCursor.add(filter); - - } - } - - final FilterSplitter filterSplitter = new FilterSplitter(); + final FilterSplitter filterSplitter = new FilterSplitter(inputColumn, inputColumnCapabilites, queryVirtualColumns); if (queryFilter != null) { if (queryFilter.getRequiredColumns().contains(outputColumnName)) { // outside filter contains unnested column // requires check for OR and And filters, disqualify rewrite for non-unnest filters if (queryFilter instanceof BooleanFilter) { + int originalFilterCount = Filters.countNumberOfFilters(queryFilter); + boolean isTopLevelAndFilter = queryFilter instanceof AndFilter; List preFilterList = recursiveRewriteOnUnnestFilters( (BooleanFilter) queryFilter, inputColumn, - inputColumnCapabilites + inputColumnCapabilites, + filterSplitter, + isTopLevelAndFilter ); - if (queryFilter instanceof AndFilter) { - filterSplitter.addPreFilter(new AndFilter(preFilterList)); - } else if (queryFilter instanceof OrFilter) { - filterSplitter.addPreFilter(new OrFilter(preFilterList)); + int preFilterSize = preFilterList.stream().map(f -> Filters.countNumberOfFilters(f)).mapToInt(Integer::intValue).sum(); + // If rewite on entire query filter is successful then add entire filter to preFilter else skip and only add to post filter. + if (originalFilterCount == preFilterSize) { + if (queryFilter instanceof AndFilter) { + filterSplitter.addPreFilter(new AndFilter(preFilterList)); + } else if (queryFilter instanceof OrFilter) { + filterSplitter.addPreFilter(new OrFilter(preFilterList)); + } } // add the entire query filter to unnest filter to be used in Value matcher filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true); @@ -401,9 +360,85 @@ void addPreFilter(@Nullable final Filter filter) ); } + class FilterSplitter + { + private String inputColumn; + private ColumnCapabilities inputColumnCapabilites; + private VirtualColumns queryVirtualColumns; + + public FilterSplitter( + String inputColumn, + ColumnCapabilities inputColumnCapabilites, VirtualColumns queryVirtualColumns + ) + { + this.inputColumn = inputColumn; + this.inputColumnCapabilites = inputColumnCapabilites; + this.queryVirtualColumns = queryVirtualColumns; + } + + final List filtersPushedDownToBaseCursor = new ArrayList<>(); + final List filtersForPostUnnestCursor = new ArrayList<>(); + + void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) + { + if (filter == null) { + return; + } + if (!skipPreFilters) { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. + filtersPushedDownToBaseCursor.add(newFilter); + } + } + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + filtersForPostUnnestCursor.add(filter); + } + + void addPreFilter(@Nullable final Filter filter) + { + if (filter == null) { + return; + } + + final Set requiredColumns = filter.getRequiredColumns(); + + // Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call + // that perhaps forces the code to use a ValueMatcher where an index would've been available, + // which can have real performance implications. This is an interim choice made to value correctness + // over performance. When we need to optimize this performance, we should be able to + // create a VirtualColumnDatasource that contains all the virtual columns, in which case the query + // itself would stop carrying them and everything should be able to be pushed down. + if (queryVirtualColumns.getVirtualColumns().length > 0) { + for (String column : requiredColumns) { + if (queryVirtualColumns.exists(column)) { + filtersForPostUnnestCursor.add(filter); + return; + } + } + } + filtersPushedDownToBaseCursor.add(filter); + + } + } + /** * handles the nested rewrite for unnest columns in recursive way, - * it loops through all and/or filters and rewrite only required filters in the child and skip others + * it loops through all and/or filters and rewrite only required filters in the child and add it to preFilter if qualified + * or else skip adding it to preFilters. + * RULES: + * 1. Add to preFilters only when top level filter is AND. + * for example: a=1 and (b=2 or c=2) , In this case a=1 can be added as preFilters but we can not add b=2 as preFilters. + * 2. If Top level is OR filter then we can either choose to add entire top level OR filter to preFilter or skip it all together. + * for example: a=1 or (b=2 and c=2) + * 3. Filters on unnest column which is derived from Array or any other Expression can not be pushe down to base. + * for example: a=1 and vc=3 , lets say vc is ExpressionVirtualColumn, and vc=3 can not be push down to base even if top level is AND filter. + * A. Unnesting a single dimension e.g. select * from foo, UNNEST(MV_TO_ARRAY(dim3)) as u(d3) + * B. Unnesting an expression from multiple columns e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) + * In case A, d3 is a direct reference to dim3 so any filter using d3 can be rewritten using dim3 and added to pre filter + * while in case B, due to presence of the expression virtual column expressionVirtualColumn("j0.unnest", "array(\"dim1\",\"dim2\")", ColumnType.STRING_ARRAY) + * the filters on d12 cannot be pushed to the pre filters * * @param queryFilter query filter passed to makeCursors * @param inputColumn input column to unnest if it's a direct access; otherwise null @@ -412,7 +447,9 @@ void addPreFilter(@Nullable final Filter filter) private List recursiveRewriteOnUnnestFilters( BooleanFilter queryFilter, final String inputColumn, - final ColumnCapabilities inputColumnCapabilites + final ColumnCapabilities inputColumnCapabilites, + final FilterSplitter filterSplitter, + final boolean isTopLevelAndFilter ) { final List preFilterList = new ArrayList<>(); @@ -422,14 +459,20 @@ private List recursiveRewriteOnUnnestFilters( preFilterList.add(new AndFilter(recursiveRewriteOnUnnestFilters( (BooleanFilter) filter, inputColumn, - inputColumnCapabilites + inputColumnCapabilites, + filterSplitter, + isTopLevelAndFilter ))); } else if (filter instanceof OrFilter) { - preFilterList.add(new OrFilter(recursiveRewriteOnUnnestFilters( + // in case of Or Fiters, we set isTopLevelAndFilter to false that prevents pushing down any child filters to base + List orChildFilters = recursiveRewriteOnUnnestFilters( (BooleanFilter) filter, inputColumn, - inputColumnCapabilites - ))); + inputColumnCapabilites, + filterSplitter, + false + ); + preFilterList.add(new OrFilter(orChildFilters)); } else { final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( filter, @@ -438,8 +481,13 @@ private List recursiveRewriteOnUnnestFilters( ); if (newFilter != null) { preFilterList.add(newFilter); - } else { - preFilterList.add(filter); + } + /* + Push down the filters to base only if top level is And Filter + we can not push down if top level filter is OR or unnestColumn is derived expression like arrays + */ + if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) { + filterSplitter.addPreFilter(newFilter != null ? newFilter : filter); } } } else { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index c522f8b4aa7a..117b8cd1de65 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -26,6 +26,7 @@ import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.filter.BooleanFilter; import org.apache.druid.query.filter.ColumnIndexSelector; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -422,4 +423,19 @@ private static LinkedHashSet flattenOrChildren(final Collection return retVal; } + + public static int countNumberOfFilters(Filter filter) + { + if (filter == null) { + return 0; + } + if (filter instanceof BooleanFilter) { + return ((BooleanFilter) filter).getFilters() + .stream() + .map(f -> countNumberOfFilters(f)) + .mapToInt(Integer::intValue) + .sum(); + } + return 1; + } } diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 73eb65fa0314..0aa3957ca045 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; @@ -32,11 +33,14 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.Filter; +<<<<<<< HEAD import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; +======= +>>>>>>> 27981e5e51 (Refactor logic for unnest filters/query filters and add tests) import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; @@ -72,6 +76,11 @@ import java.util.Arrays; import java.util.List; +import static org.apache.druid.segment.filter.FilterTestUtils.sdf; +import static org.apache.druid.segment.filter.FilterTestUtils.sdfd; +import static org.apache.druid.segment.filter.Filters.and; +import static org.apache.druid.segment.filter.Filters.or; + public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { private static Closer CLOSER; @@ -286,13 +295,13 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final OrFilter baseFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), - new SelectorDimFilter(inputColumn, "2", null).toFilter() + sdf(OUTPUT_COLUMN_NAME, "1", null), + sdf(inputColumn, "2", null) )); final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(inputColumn, "1", null).toFilter(), - new SelectorDimFilter(inputColumn, "2", null).toFilter() + sdf(inputColumn, "1", null), + sdf(inputColumn, "2", null) )); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -330,18 +339,18 @@ public void test_nested_filters_unnested_and_original_dimension_with_unnest_adap final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final OrFilter baseFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), + sdf(OUTPUT_COLUMN_NAME, "1", null), new AndFilter(ImmutableList.of( - new SelectorDimFilter(inputColumn, "2", null).toFilter(), - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "10", null).toFilter() + sdf(inputColumn, "2", null), + sdf(OUTPUT_COLUMN_NAME, "10", null) )) )); final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(inputColumn, "1", null).toFilter(), + sdf(inputColumn, "1", null), new AndFilter(ImmutableList.of( - new SelectorDimFilter(inputColumn, "2", null).toFilter(), - new SelectorDimFilter(inputColumn, "10", null).toFilter() + sdf(inputColumn, "2", null), + sdf(inputColumn, "10", null) )) )); @@ -367,12 +376,83 @@ public void test_nested_filters_unnested_and_original_dimension_with_unnest_adap }); } @Test + public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() + { + final Filter testQueryFilter = and(ImmutableList.of( + sdf(OUTPUT_COLUMN_NAME, "3", null), + or(ImmutableList.of( + sdf("newcol", "2", null), + sdf(COLUMNNAME, "2", null), + sdf(OUTPUT_COLUMN_NAME, "1", null) + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "unnested-multi-string1 = 3", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))" + ); + } + + @Test + public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() + { + final Filter testQueryFilter = or(ImmutableList.of( + sdf(OUTPUT_COLUMN_NAME, "3", null), + and(ImmutableList.of( + sdf("newcol", "2", null), + sdf(COLUMNNAME, "2", null), + sdf(OUTPUT_COLUMN_NAME, "1", null) + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "", + "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))" + ); + } + + @Test + public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs() + { + final Filter testQueryFilter = and(ImmutableList.of( + sdf(OUTPUT_COLUMN_NAME, "3", null), + or(ImmutableList.of( + sdf("newcol", "2", null), + sdf(COLUMNNAME, "2", null) + )), + or(ImmutableList.of( + sdf("newcol", "4", null), + sdf(COLUMNNAME, "8", null), + sdf(OUTPUT_COLUMN_NAME, "6", null) + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "unnested-multi-string1 = 3", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))" + ); + } + + @Test + public void test_nested_filters_unnested_and_topLevelAND2sdf() + { + final Filter testQueryFilter = and(ImmutableList.of( + sdf(OUTPUT_COLUMN_NAME, "3", null), + sdf(COLUMNNAME, "2", null) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "unnested-multi-string1 = 3", + "(unnested-multi-string1 = 3 && multi-string1 = 2)" + ); + } + @Test public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) + sdfd(OUTPUT_COLUMN_NAME, "1", null) ); final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); @@ -380,7 +460,7 @@ public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - new SelectorDimFilter(inputColumn, "1", null).toFilter(); + sdf(inputColumn, "1", null); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -426,10 +506,9 @@ public void test_pushdown_filters_unnested_dimension_outside() final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - new SelectorDimFilter(inputColumn, "1", null).toFilter(); - + sdf(inputColumn, "1", null); - final Filter queryFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(); + final Filter queryFilter = sdf(OUTPUT_COLUMN_NAME, "1", null); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( queryFilter, unnestStorageAdapter.getInterval(), @@ -458,6 +537,7 @@ public void test_pushdown_filters_unnested_dimension_outside() }); } +<<<<<<< HEAD @Test public void testAllowedFiltersForPushdown() { @@ -497,6 +577,34 @@ public void testAllowedFiltersForPushdown() Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(notAnd))); Assert.assertFalse( UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(new OrFilter(Arrays.asList(notAllowed)))) +======= + public void testComputeBaseAndPostUnnestFilters( + Filter testQueryFilter, + String expectedBasePushDown, + String expectedPostUnnest + ) + { + final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(UNNEST_STORAGE_ADAPTER.getUnnestColumn()); + final VirtualColumn vc = UNNEST_STORAGE_ADAPTER.getUnnestColumn(); + Pair filterPair = UNNEST_STORAGE_ADAPTER.computeBaseAndPostUnnestFilters( + testQueryFilter, + null, + VirtualColumns.EMPTY, + inputColumn, + vc.capabilities(inputColumn) + ); + Filter actualPushDownFilter = filterPair.lhs; + Filter actualPostUnnestFilter = filterPair.rhs; + Assert.assertEquals( + "Expects only top level child of And Filter to push down to base", + expectedBasePushDown, + actualPushDownFilter == null ? "" : actualPushDownFilter.toString() + ); + Assert.assertEquals( + "Should have post unnest filter", + expectedPostUnnest, + actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString() +>>>>>>> 27981e5e51 (Refactor logic for unnest filters/query filters and add tests) ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java index 003d8d8ef223..79ab5291df0a 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java @@ -19,7 +19,10 @@ package org.apache.druid.segment.filter; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.SelectorDimFilter; import java.util.Arrays; @@ -44,4 +47,19 @@ public static SelectorFilter selector(final String fieldName, final String value { return new SelectorFilter(fieldName, value, null); } + + public static Filter sdf(String dimension, String value) + { + return new SelectorDimFilter(dimension, value, null).toFilter(); + } + + public static Filter sdf(String dimension, String value, ExtractionFn extractionFn) + { + return new SelectorDimFilter(dimension, value, extractionFn).toFilter(); + } + + public static DimFilter sdfd(String dimension, String value, ExtractionFn extractionFn) + { + return new SelectorDimFilter(dimension, value, extractionFn); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 3bddac25ec7a..cb4544e38e31 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2924,8 +2924,10 @@ public void testUnnestThriceWithFiltersOnDimAndUnnestCol() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters(and( - bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), - new SelectorDimFilter("j0.unnest", "Baz", null) + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) )) .legacy(false) .context(QUERY_CONTEXT_UNNEST) @@ -2999,21 +3001,23 @@ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() "_j0.unnest", "\"dimMultivalEnumerated\"", ColumnType.STRING - ), new SelectorDimFilter("_j0.unnest", "Hello", null) + ), equality("_j0.unnest", "Hello", ColumnType.STRING) ), expressionVirtualColumn( "__j0.unnest", "\"dimMultivalEnumerated\"", ColumnType.STRING ), - new SelectorDimFilter("__j0.unnest", "World", null) + equality("__j0.unnest", "World", ColumnType.STRING) ) ) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters(and( - bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), - new SelectorDimFilter("j0.unnest", "Baz", null) + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) )) .legacy(false) .context(QUERY_CONTEXT_UNNEST) @@ -3065,16 +3069,24 @@ public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations() "\"dimMultivalEnumerated\"", ColumnType.STRING ), - new SelectorDimFilter("__j0.unnest", "World", null) + equality("__j0.unnest", "World", ColumnType.STRING) ) ) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters(and( - bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + NullHandling.sqlCompatible() ? equality("dimZipf", "27", ColumnType.LONG) : bound( + "dimZipf", + "27", + "27", + false, + false, + null, + StringComparators.NUMERIC + ), or( - new SelectorDimFilter("j0.unnest", "Baz", null), - new SelectorDimFilter("_j0.unnest", "Hello", null) + equality("j0.unnest", "Baz", ColumnType.STRING), + equality("_j0.unnest", "Hello", ColumnType.STRING) ) )) .legacy(false) From 9a6e9115edcd8ac3bb28e3ef233e5a59fac0883a Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Tue, 1 Aug 2023 16:41:09 -0700 Subject: [PATCH 3/6] addressing comments --- .../druid/segment/UnnestStorageAdapter.java | 3 +- .../segment/UnnestStorageAdapterTest.java | 172 ++++++++---------- .../druid/segment/filter/FilterTestUtils.java | 18 -- .../sql/calcite/CalciteArraysQueryTest.java | 4 +- 4 files changed, 84 insertions(+), 113 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index bab1655a6bff..063b5fa4d5bc 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -368,7 +368,8 @@ class FilterSplitter public FilterSplitter( String inputColumn, - ColumnCapabilities inputColumnCapabilites, VirtualColumns queryVirtualColumns + ColumnCapabilities inputColumnCapabilites, + VirtualColumns queryVirtualColumns ) { this.inputColumn = inputColumn; diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 0aa3957ca045..54d6ecde279d 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -20,7 +20,6 @@ package org.apache.druid.segment; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; @@ -30,29 +29,13 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.filter.BoundDimFilter; -import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.Filter; -<<<<<<< HEAD -import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.NullFilter; -import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; -======= ->>>>>>> 27981e5e51 (Refactor logic for unnest filters/query filters and add tests) import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.AndFilter; -import org.apache.druid.segment.filter.BoundFilter; -import org.apache.druid.segment.filter.ColumnComparisonFilter; -import org.apache.druid.segment.filter.FalseFilter; -import org.apache.druid.segment.filter.LikeFilter; -import org.apache.druid.segment.filter.NotFilter; import org.apache.druid.segment.filter.OrFilter; import org.apache.druid.segment.filter.SelectorFilter; -import org.apache.druid.segment.filter.TrueFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; @@ -76,8 +59,7 @@ import java.util.Arrays; import java.util.List; -import static org.apache.druid.segment.filter.FilterTestUtils.sdf; -import static org.apache.druid.segment.filter.FilterTestUtils.sdfd; +import static org.apache.druid.segment.filter.FilterTestUtils.selector; import static org.apache.druid.segment.filter.Filters.and; import static org.apache.druid.segment.filter.Filters.or; @@ -295,13 +277,13 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final OrFilter baseFilter = new OrFilter(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "1", null), - sdf(inputColumn, "2", null) + selector(OUTPUT_COLUMN_NAME, "1"), + selector(inputColumn, "2") )); final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( - sdf(inputColumn, "1", null), - sdf(inputColumn, "2", null) + selector(inputColumn, "1"), + selector(inputColumn, "2") )); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -339,18 +321,18 @@ public void test_nested_filters_unnested_and_original_dimension_with_unnest_adap final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final OrFilter baseFilter = new OrFilter(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "1", null), + selector(OUTPUT_COLUMN_NAME, "1"), new AndFilter(ImmutableList.of( - sdf(inputColumn, "2", null), - sdf(OUTPUT_COLUMN_NAME, "10", null) + selector(inputColumn, "2"), + selector(OUTPUT_COLUMN_NAME, "10") )) )); final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( - sdf(inputColumn, "1", null), + selector(inputColumn, "1"), new AndFilter(ImmutableList.of( - sdf(inputColumn, "2", null), - sdf(inputColumn, "10", null) + selector(inputColumn, "2"), + selector(inputColumn, "10") )) )); @@ -379,11 +361,11 @@ public void test_nested_filters_unnested_and_original_dimension_with_unnest_adap public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() { final Filter testQueryFilter = and(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "3", null), + selector(OUTPUT_COLUMN_NAME, "3"), or(ImmutableList.of( - sdf("newcol", "2", null), - sdf(COLUMNNAME, "2", null), - sdf(OUTPUT_COLUMN_NAME, "1", null) + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + selector(OUTPUT_COLUMN_NAME, "1") )) )); testComputeBaseAndPostUnnestFilters( @@ -392,16 +374,64 @@ public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))" ); } - + @Test + public void test_nested_multiLevel_filters_unnested() + { + final Filter testQueryFilter = and(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + and(ImmutableList.of( + selector("newcol", "3"), + selector(COLUMNNAME, "7") + )) + )), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "unnested-multi-string1 = 3", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))" + ); + } + @Test + public void test_nested_multiLevel_filters_unnested5Level() + { + final Filter testQueryFilter = or(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + and(ImmutableList.of( + selector("newcol", "3"), + and(ImmutableList.of( + selector(COLUMNNAME, "7"), + selector("newcol_1", "10") + )) + )) + )), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "", + "(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 = 1)" + ); + } @Test public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() { final Filter testQueryFilter = or(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "3", null), + selector(OUTPUT_COLUMN_NAME, "3"), and(ImmutableList.of( - sdf("newcol", "2", null), - sdf(COLUMNNAME, "2", null), - sdf(OUTPUT_COLUMN_NAME, "1", null) + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + selector(OUTPUT_COLUMN_NAME, "1") )) )); testComputeBaseAndPostUnnestFilters( @@ -415,15 +445,15 @@ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs() { final Filter testQueryFilter = and(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "3", null), + selector(OUTPUT_COLUMN_NAME, "3"), or(ImmutableList.of( - sdf("newcol", "2", null), - sdf(COLUMNNAME, "2", null) + selector("newcol", "2"), + selector(COLUMNNAME, "2") )), or(ImmutableList.of( - sdf("newcol", "4", null), - sdf(COLUMNNAME, "8", null), - sdf(OUTPUT_COLUMN_NAME, "6", null) + selector("newcol", "4"), + selector(COLUMNNAME, "8"), + selector(OUTPUT_COLUMN_NAME, "6") )) )); testComputeBaseAndPostUnnestFilters( @@ -437,8 +467,8 @@ public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOr public void test_nested_filters_unnested_and_topLevelAND2sdf() { final Filter testQueryFilter = and(ImmutableList.of( - sdf(OUTPUT_COLUMN_NAME, "3", null), - sdf(COLUMNNAME, "2", null) + selector(OUTPUT_COLUMN_NAME, "3"), + selector(COLUMNNAME, "2") )); testComputeBaseAndPostUnnestFilters( testQueryFilter, @@ -452,7 +482,7 @@ public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - sdfd(OUTPUT_COLUMN_NAME, "1", null) + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) ); final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); @@ -460,7 +490,7 @@ public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - sdf(inputColumn, "1", null); + selector(inputColumn, "1"); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -506,9 +536,9 @@ public void test_pushdown_filters_unnested_dimension_outside() final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - sdf(inputColumn, "1", null); + selector(inputColumn, "1"); - final Filter queryFilter = sdf(OUTPUT_COLUMN_NAME, "1", null); + final Filter queryFilter = new SelectorFilter(OUTPUT_COLUMN_NAME, "1", null); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( queryFilter, unnestStorageAdapter.getInterval(), @@ -537,47 +567,6 @@ public void test_pushdown_filters_unnested_dimension_outside() }); } -<<<<<<< HEAD - @Test - public void testAllowedFiltersForPushdown() - { - Filter[] allowed = new Filter[] { - new SelectorFilter("column", "value"), - new InDimFilter("column", ImmutableSet.of("a", "b")), - new LikeFilter("column", null, LikeDimFilter.LikeMatcher.from("hello%", null), null), - new BoundFilter( - new BoundDimFilter("column", "a", "b", true, true, null, null, null) - ), - NullFilter.forColumn("column"), - new EqualityFilter("column", ColumnType.LONG, 1234L, null), - new RangeFilter("column", ColumnType.LONG, 0L, 1234L, true, false, null) - }; - // not exhaustive - Filter[] notAllowed = new Filter[] { - TrueFilter.instance(), - FalseFilter.instance(), - new ColumnComparisonFilter(ImmutableList.of(DefaultDimensionSpec.of("col1"), DefaultDimensionSpec.of("col2"))) - }; - - for (Filter f : allowed) { - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f)); - } - for (Filter f : notAllowed) { - Assert.assertFalse(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f)); - } - - Filter notAnd = new NotFilter( - new AndFilter( - Arrays.asList(allowed) - ) - ); - - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(notAnd)); - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new OrFilter(Arrays.asList(allowed)))); - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(notAnd))); - Assert.assertFalse( - UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(new OrFilter(Arrays.asList(notAllowed)))) -======= public void testComputeBaseAndPostUnnestFilters( Filter testQueryFilter, String expectedBasePushDown, @@ -604,7 +593,6 @@ public void testComputeBaseAndPostUnnestFilters( "Should have post unnest filter", expectedPostUnnest, actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString() ->>>>>>> 27981e5e51 (Refactor logic for unnest filters/query filters and add tests) ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java b/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java index 79ab5291df0a..003d8d8ef223 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/FilterTestUtils.java @@ -19,10 +19,7 @@ package org.apache.druid.segment.filter; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.SelectorDimFilter; import java.util.Arrays; @@ -47,19 +44,4 @@ public static SelectorFilter selector(final String fieldName, final String value { return new SelectorFilter(fieldName, value, null); } - - public static Filter sdf(String dimension, String value) - { - return new SelectorDimFilter(dimension, value, null).toFilter(); - } - - public static Filter sdf(String dimension, String value, ExtractionFn extractionFn) - { - return new SelectorDimFilter(dimension, value, extractionFn).toFilter(); - } - - public static DimFilter sdfd(String dimension, String value, ExtractionFn extractionFn) - { - return new SelectorDimFilter(dimension, value, extractionFn); - } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index cb4544e38e31..ab8330b2c566 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2925,7 +2925,7 @@ public void testUnnestThriceWithFiltersOnDimAndUnnestCol() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters(and( NullHandling.sqlCompatible() - ? equality("dimZipf", "27", ColumnType.LONG) + ? numericEquality("dimZipf", "27", ColumnType.LONG) : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), equality("j0.unnest", "Baz", ColumnType.STRING) )) @@ -3015,7 +3015,7 @@ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters(and( NullHandling.sqlCompatible() - ? equality("dimZipf", "27", ColumnType.LONG) + ? numericEquality("dimZipf", "27", ColumnType.LONG) : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), equality("j0.unnest", "Baz", ColumnType.STRING) )) From f8f969bbe181b1d0becad83eee46db85635325cd Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Fri, 4 Aug 2023 15:58:09 -0700 Subject: [PATCH 4/6] Removing 2 parse for filter counts and addressing comments --- .../druid/segment/UnnestStorageAdapter.java | 35 +++++++++++++++++-- .../apache/druid/segment/filter/Filters.java | 2 +- .../segment/UnnestStorageAdapterTest.java | 14 ++++---- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 063b5fa4d5bc..7d88f45a917b 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -322,7 +322,6 @@ to generate filters to be passed to base cursor (filtersPushedDownToBaseCursor) // outside filter contains unnested column // requires check for OR and And filters, disqualify rewrite for non-unnest filters if (queryFilter instanceof BooleanFilter) { - int originalFilterCount = Filters.countNumberOfFilters(queryFilter); boolean isTopLevelAndFilter = queryFilter instanceof AndFilter; List preFilterList = recursiveRewriteOnUnnestFilters( (BooleanFilter) queryFilter, @@ -331,9 +330,8 @@ to generate filters to be passed to base cursor (filtersPushedDownToBaseCursor) filterSplitter, isTopLevelAndFilter ); - int preFilterSize = preFilterList.stream().map(f -> Filters.countNumberOfFilters(f)).mapToInt(Integer::intValue).sum(); // If rewite on entire query filter is successful then add entire filter to preFilter else skip and only add to post filter. - if (originalFilterCount == preFilterSize) { + if (filterSplitter.getPreFilterCount() == filterSplitter.getOriginalFilterCount()) { if (queryFilter instanceof AndFilter) { filterSplitter.addPreFilter(new AndFilter(preFilterList)); } else if (queryFilter instanceof OrFilter) { @@ -366,6 +364,9 @@ class FilterSplitter private ColumnCapabilities inputColumnCapabilites; private VirtualColumns queryVirtualColumns; + private int originalFilterCount = 0; + private int preFilterCount = 0; + public FilterSplitter( String inputColumn, ColumnCapabilities inputColumnCapabilites, @@ -422,6 +423,26 @@ void addPreFilter(@Nullable final Filter filter) filtersPushedDownToBaseCursor.add(filter); } + + public void addToOriginalFilterCount(int c) + { + originalFilterCount += c; + } + + public void addToPreFilterCount(int c) + { + preFilterCount += c; + } + + public int getOriginalFilterCount() + { + return originalFilterCount; + } + + public int getPreFilterCount() + { + return preFilterCount; + } } /** @@ -481,7 +502,9 @@ private List recursiveRewriteOnUnnestFilters( inputColumnCapabilites ); if (newFilter != null) { + // this is making sure that we are not pushing the unnest columns filters to base filter without rewriting. preFilterList.add(newFilter); + filterSplitter.addToPreFilterCount(1); } /* Push down the filters to base only if top level is And Filter @@ -489,10 +512,16 @@ private List recursiveRewriteOnUnnestFilters( */ if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) { filterSplitter.addPreFilter(newFilter != null ? newFilter : filter); + filterSplitter.addToPreFilterCount(1); } + filterSplitter.addToOriginalFilterCount(1); } } else { preFilterList.add(filter); + // for filters on non unnest columns, we still need to count the nested filters if any as we are not traversing it in this method + int filterCount = Filters.countNumberOfFilters(filter); + filterSplitter.addToOriginalFilterCount(filterCount); + filterSplitter.addToPreFilterCount(filterCount); } } return preFilterList; diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 117b8cd1de65..14c2e2f770c5 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -424,7 +424,7 @@ private static LinkedHashSet flattenOrChildren(final Collection return retVal; } - public static int countNumberOfFilters(Filter filter) + public static int countNumberOfFilters(@Nullable Filter filter) { if (filter == null) { return 0; diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 54d6ecde279d..8c7ef5643fb0 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -370,7 +370,7 @@ public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "unnested-multi-string1 = 3", + "multi-string1 = 3", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))" ); } @@ -393,7 +393,7 @@ public void test_nested_multiLevel_filters_unnested() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "unnested-multi-string1 = 3", + "multi-string1 = 3", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))" ); } @@ -419,7 +419,7 @@ public void test_nested_multiLevel_filters_unnested5Level() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "", + "(multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || multi-string1 = 1)", "(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 = 1)" ); } @@ -436,7 +436,7 @@ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "", + "(multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && multi-string1 = 1))", "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))" ); } @@ -458,7 +458,7 @@ public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOr )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "unnested-multi-string1 = 3", + "multi-string1 = 3", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))" ); } @@ -472,7 +472,7 @@ public void test_nested_filters_unnested_and_topLevelAND2sdf() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "unnested-multi-string1 = 3", + "multi-string1 = 3", "(unnested-multi-string1 = 3 && multi-string1 = 2)" ); } @@ -580,7 +580,7 @@ public void testComputeBaseAndPostUnnestFilters( null, VirtualColumns.EMPTY, inputColumn, - vc.capabilities(inputColumn) + vc.capabilities(UNNEST_STORAGE_ADAPTER, inputColumn) ); Filter actualPushDownFilter = filterPair.lhs; Filter actualPostUnnestFilter = filterPair.rhs; From 0a1dece8046501f791c438d80ea219fb09278708 Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Mon, 14 Aug 2023 18:04:50 -0700 Subject: [PATCH 5/6] Fixing the bug in the count and fix tests --- .../druid/segment/UnnestStorageAdapter.java | 1 - .../segment/UnnestStorageAdapterTest.java | 8 ++--- .../sql/calcite/CalciteArraysQueryTest.java | 35 +++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 7d88f45a917b..71a1809a885c 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -512,7 +512,6 @@ private List recursiveRewriteOnUnnestFilters( */ if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) { filterSplitter.addPreFilter(newFilter != null ? newFilter : filter); - filterSplitter.addToPreFilterCount(1); } filterSplitter.addToOriginalFilterCount(1); } diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 8c7ef5643fb0..4257ac4c0723 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -370,7 +370,7 @@ public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "multi-string1 = 3", + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || multi-string1 = 1))", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))" ); } @@ -393,7 +393,7 @@ public void test_nested_multiLevel_filters_unnested() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "multi-string1 = 3", + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || multi-string1 = 1))", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))" ); } @@ -458,7 +458,7 @@ public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOr )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "multi-string1 = 3", + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || multi-string1 = 6))", "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))" ); } @@ -472,7 +472,7 @@ public void test_nested_filters_unnested_and_topLevelAND2sdf() )); testComputeBaseAndPostUnnestFilters( testQueryFilter, - "multi-string1 = 3", + "(multi-string1 = 3 && multi-string1 = 2)", "(unnested-multi-string1 = 3 && multi-string1 = 2)" ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index ab8330b2c566..a595c101bfa1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3376,6 +3376,41 @@ public void testUnnestFirstQueryOnSelect() ); } + @Test + public void testUnnestVirtualWithColumns1() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' and (m1<=10 or strings='b'))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of(Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + equality("j0.unnest", "a", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .filters(or( + bound("m1", null, "10", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "b", ColumnType.STRING) + )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest", "m1")) + .build()), + ImmutableList.of(new Object[]{"a", 1.0f}) + ); + } @Test public void testUnnestWithFilters() { From 96a82081376171f025c6f86d896552db0bebbfbb Mon Sep 17 00:00:00 2001 From: Pranav Bhole Date: Tue, 15 Aug 2023 18:51:03 -0700 Subject: [PATCH 6/6] Fixing sql compat mode tests --- .../sql/calcite/CalciteArraysQueryTest.java | 279 +++++++++++------- 1 file changed, 176 insertions(+), 103 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index a595c101bfa1..9f25f4cd00f8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2890,14 +2890,12 @@ public void testUnnestThriceWithFiltersOnDimAndUnnestCol() + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + " WHERE dimZipf=27 AND dim3_unnest1='Baz'"; - testQuery( - sql, - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource( + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( UnnestDataSource.create( - UnnestDataSource.create( + FilteredDataSource.create( UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE5), expressionVirtualColumn( @@ -2907,33 +2905,43 @@ public void testUnnestThriceWithFiltersOnDimAndUnnestCol() ), null ), - expressionVirtualColumn( - "_j0.unnest", - "\"dimMultivalEnumerated\"", - ColumnType.STRING - ), null + and( + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) + ) ), expressionVirtualColumn( - "__j0.unnest", + "_j0.unnest", "\"dimMultivalEnumerated\"", ColumnType.STRING - ), - null - ) + ), null + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(and( - NullHandling.sqlCompatible() - ? numericEquality("dimZipf", "27", ColumnType.LONG) - : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), - equality("j0.unnest", "Baz", ColumnType.STRING) - )) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) - .build() - ), + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .virtualColumns(expressionVirtualColumn( + "v0", + "'Baz'", + ColumnType.STRING + )) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, ImmutableList.of( new Object[]{"27", "Baz", "Baz", "Baz"}, new Object[]{"27", "Baz", "Baz", "Baz"}, @@ -2980,14 +2988,12 @@ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + " WHERE dimZipf=27 AND dim3_unnest1='Baz' AND dim3_unnest2='Hello' AND dim3_unnest3='World'"; - testQuery( - sql, - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource( + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( UnnestDataSource.create( - UnnestDataSource.create( + FilteredDataSource.create( UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE5), expressionVirtualColumn( @@ -2997,33 +3003,43 @@ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() ), null ), - expressionVirtualColumn( - "_j0.unnest", - "\"dimMultivalEnumerated\"", - ColumnType.STRING - ), equality("_j0.unnest", "Hello", ColumnType.STRING) + and( + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) + ) ), expressionVirtualColumn( - "__j0.unnest", + "_j0.unnest", "\"dimMultivalEnumerated\"", ColumnType.STRING - ), - equality("__j0.unnest", "World", ColumnType.STRING) - ) + ), equality("_j0.unnest", "Hello", ColumnType.STRING) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + equality("__j0.unnest", "World", ColumnType.STRING) ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(and( - NullHandling.sqlCompatible() - ? numericEquality("dimZipf", "27", ColumnType.LONG) - : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), - equality("j0.unnest", "Baz", ColumnType.STRING) - )) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) - .build() - ), + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns(expressionVirtualColumn( + "v0", + "'Baz'", + ColumnType.STRING + )) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, ImmutableList.of( new Object[]{"27", "Baz", "Hello", "World"}, new Object[]{"27", "Baz", "Hello", "World"} @@ -3035,65 +3051,68 @@ public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations() { cannotVectorize(); + skipVectorize(); String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + " ( SELECT * FROM \n" + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + " WHERE dimZipf=27 AND (dim3_unnest1='Baz' OR dim3_unnest2='Hello') AND dim3_unnest3='World'"; - testQuery( - sql, - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( + List> expectedQuerySqlCom = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + FilteredDataSource.create( UnnestDataSource.create( - UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE5), - expressionVirtualColumn( - "j0.unnest", - "\"dimMultivalEnumerated\"", - ColumnType.STRING + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null ), - null + NullHandling.sqlCompatible() ? equality("dimZipf", "27", ColumnType.LONG) : range( + "dimZipf", + ColumnType.LONG, + "27", + "27", + false, + false + ) ), expressionVirtualColumn( "_j0.unnest", "\"dimMultivalEnumerated\"", ColumnType.STRING - ), null - ), - expressionVirtualColumn( - "__j0.unnest", - "\"dimMultivalEnumerated\"", - ColumnType.STRING + ), + null ), - equality("__j0.unnest", "World", ColumnType.STRING) - ) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(and( - NullHandling.sqlCompatible() ? equality("dimZipf", "27", ColumnType.LONG) : bound( - "dimZipf", - "27", - "27", - false, - false, - null, - StringComparators.NUMERIC + or( + equality("j0.unnest", "Baz", ColumnType.STRING), + equality("_j0.unnest", "Hello", ColumnType.STRING) + ) // (j0.unnest = Baz || _j0.unnest = Hello) ), - or( - equality("j0.unnest", "Baz", ColumnType.STRING), - equality("_j0.unnest", "Hello", ColumnType.STRING) - ) - )) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) - .build() - ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + equality("__j0.unnest", "World", ColumnType.STRING) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, expectedQuerySqlCom, ImmutableList.of( new Object[]{"27", "Baz", "Baz", "World"}, new Object[]{"27", "Baz", "Baz", "World"}, @@ -3402,7 +3421,17 @@ public void testUnnestVirtualWithColumns1() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .filters(or( - bound("m1", null, "10", false, false, null, StringComparators.NUMERIC), + NullHandling.sqlCompatible() + ? range("m1", ColumnType.LONG, null, "10", false, false) + : bound( + "m1", + null, + "10", + false, + false, + null, + StringComparators.NUMERIC + ), equality("j0.unnest", "b", ColumnType.STRING) )) .context(QUERY_CONTEXT_UNNEST) @@ -3411,6 +3440,50 @@ public void testUnnestVirtualWithColumns1() ImmutableList.of(new Object[]{"a", 1.0f}) ); } + + @Test + public void testUnnestVirtualWithColumns2() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' or (m1=2 and strings='b'))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of(Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) // (j0.unnest = a || (m1 = 2 && j0.unnest = b)) + .filters(or( + equality("j0.unnest", "a", ColumnType.STRING), + and( + NullHandling.sqlCompatible() + ? equality("m1", "2", ColumnType.FLOAT) + : equality("m1", "2", ColumnType.STRING), + equality("j0.unnest", "b", ColumnType.STRING) + ) + )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest", "m1")) + .build()), + ImmutableList.of( + new Object[]{"a", 1.0f}, + new Object[]{"b", 2.0f} + ) + ); + } @Test public void testUnnestWithFilters() {